Exemplo n.º 1
0
                                                 batch_size=batch_size,
                                                 class_mode='binary')

test_set = test_datagen.flow_from_directory(test_set_path,
                                            target_size=input_size,
                                            batch_size=batch_size,
                                            class_mode='binary')

# Create a loss history
history = LossHistory()

# train model
classifier.fit_generator(training_set,
                         steps_per_epoch=8000 / batch_size,
                         epochs=90,
                         validation_data=test_set,
                         validation_steps=2000 / batch_size,
                         workers=12,
                         max_q_size=100,
                         callbacks=[history])

# Serialize Model
ModelSerializer.serialize_model_json(classifier, 'loss_history',
                                     'loss_history_weights')

# Predict single cases
test_image_1 = image.load_img('dataset/single_prediction/cat_or_dog_1.jpg',
                              target_size=input_size)
test_image_2 = image.load_img('dataset/single_prediction/cat_or_dog_2.jpg',
                              target_size=input_size)

test_image_1 = image.img_to_array(test_image_1)
Exemplo n.º 2
0
class KerasModel:
    def __init__(self, img_size, img_channels=3, output_size=17):
        self.losses = []
        self.model = Sequential()
        self.model.add(
            BatchNormalization(input_shape=(img_size[0], img_size[1],
                                            img_channels)))

        self.model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
        self.model.add(Conv2D(32, (3, 3), activation='relu'))
        self.model.add(MaxPooling2D(pool_size=2))
        self.model.add(Dropout(0.3))

        self.model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
        self.model.add(Conv2D(64, (3, 3), activation='relu'))
        self.model.add(MaxPooling2D(pool_size=2))
        self.model.add(Dropout(0.3))

        self.model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
        self.model.add(Conv2D(128, (3, 3), activation='relu'))
        self.model.add(MaxPooling2D(pool_size=2))
        self.model.add(Dropout(0.3))

        self.model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
        self.model.add(Conv2D(256, (3, 3), activation='relu'))
        self.model.add(MaxPooling2D(pool_size=2))
        self.model.add(Dropout(0.3))

        self.model.add(Conv2D(512, (3, 3), padding='same', activation='relu'))
        self.model.add(Conv2D(512, (3, 3), activation='relu'))
        self.model.add(MaxPooling2D(pool_size=2))
        self.model.add(Dropout(0.3))

        self.model.add(Flatten())

        self.model.add(Dense(512, activation='relu'))
        self.model.add(BatchNormalization())
        self.model.add(Dropout(0.5))

        self.model.add(Dense(output_size, activation='sigmoid'))

    def get_fbeta_score(self, validation_data, verbose=True):
        p_valid = self.model.predict(validation_data[0])
        thresholds = optimise_f2_thresholds(validation_data[1],
                                            p_valid,
                                            verbose=verbose)
        return fbeta_score(validation_data[1],
                           np.array(p_valid) > thresholds,
                           beta=2,
                           average='samples'), thresholds

    def fit(self,
            flow,
            epochs,
            lr,
            validation_data,
            train_callbacks=[],
            batches=300):
        history = LossHistory()
        fbeta = Fbeta(validation_data)
        opt = Adam(lr=lr)
        self.model.compile(loss='binary_crossentropy',
                           optimizer=opt,
                           metrics=['accuracy'])

        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=3,
                                      verbose=0,
                                      mode='auto')
        self.model.fit_generator(flow,
                                 steps_per_epoch=batches,
                                 epochs=epochs,
                                 callbacks=[history, earlyStopping, fbeta] +
                                 train_callbacks,
                                 validation_data=validation_data)
        fb_score, thresholds = self.get_fbeta_score(validation_data,
                                                    verbose=False)
        return [
            fbeta.fbeta, history.train_losses, history.val_losses, fb_score,
            thresholds
        ]

    def save_weights(self, weight_file_path):
        self.model.save_weights(weight_file_path)

    def load_weights(self, weight_file_path):
        self.model.load_weights(weight_file_path)

    def predict_image(self, image):
        img = Image.fromarray(np.uint8(image * 255))
        images = [img.copy().rotate(i) for i in [-90, 90, 180]]
        images.append(img)
        images = np.asarray([
            np.asarray(image.convert("RGB"), dtype=np.float32) / 255
            for image in images
        ])
        return sum(self.model.predict(images)) / 4

    def predict(self, x_test):
        return [self.predict_image(img) for img in tqdm(x_test)]

    def map_predictions(self, predictions, labels_map, thresholds):
        predictions_labels = []
        for prediction in predictions:
            labels = [
                labels_map[i] for i, value in enumerate(prediction)
                if value > thresholds[i]
            ]
            predictions_labels.append(labels)
        return predictions_labels

    def close(self):
        backend.clear_session()
Exemplo n.º 3
0
class AmazonKerasClassifier:
    def __init__(self):
        self.losses = []
        self.classifier = Sequential()
        self.x_vail = []
        self.y_vail = []
        self.train_filepath = ''
        self.train_img_filepath = ''
        self.valid_filepath = ''
        self.valid_img_filepath = ''
        self.test_img_filepath = ''
        self.test_addition_img_filepath = ''
        self.test_img_name_list = ''
        self.y_map = {}

    def setTrainFilePath(self, value):
        self.train_filepath = value

    def getTrainFilePath(self):
        return self.train_filepath

    def setValidFilePath(self, value):
        self.valid_filepath = value

    def getValidFilePath(self):
        return self.valid_filepath

    def setTrainImgFilePath(self, value):
        self.train_img_filepath = value

    def getTrainImgFilePath(self):
        return self.train_img_filepath

    def setValidImgFilePath(self, value):
        self.valid_img_filepath = value

    def getValidImgFilePath(self):
        return self.valid_img_filepath

    def setTestImgFilePath(self, value):
        self.test_img_filepath = value

    def getTestImgFilePath(self):
        return self.test_img_filepath

    def setTestAdditionImgFilePath(self, value):
        self.test_addition_img_filepath = value

    def getTestAdditionImgFilePath(self):
        return self.test_addition_img_filepath

    def getTestImgNameList(self):
        return self.test_img_name_list

    def getYMap(self):
        return self.y_map

    def vgg(self,
            type=16,
            bn=False,
            img_size=(224, 224),
            img_channels=3,
            output_size=1000):
        if type == 16 and bn == False:
            layer_list = vgg.vgg16(num_classes=output_size)
        elif type == 16 and bn == True:
            layer_list = vgg.vgg16_bn(num_classes=output_size)
        elif type == 11 and bn == False:
            layer_list = vgg.vgg11(num_classes=output_size)
        elif type == 11 and bn == True:
            layer_list = vgg.vgg11_bn(num_classes=output_size)
        elif type == 13 and bn == False:
            layer_list = vgg.vgg13(num_classes=output_size)
        elif type == 13 and bn == True:
            layer_list = vgg.vgg13_bn(num_classes=output_size)
        elif type == 19 and bn == False:
            layer_list = vgg.vgg19(num_classes=output_size)
        elif type == 19 and bn == True:
            layer_list = vgg.vgg19_bn(num_classes=output_size)
        else:
            print("请输入11,13,16,19这四个数字中的一个!")
        self.classifier.add(
            BatchNormalization(input_shape=(*img_size, img_channels)))
        for i, value in enumerate(layer_list):
            self.classifier.add(eval(value))

    def squeezenet(self,
                   type,
                   img_size=(64, 64),
                   img_channels=3,
                   output_size=1000):
        input_shape = Input(shape=(*img_size, img_channels))
        if type == 1:
            x = squeezenet.squeezenet1_0(input_shape, num_classes=output_size)
        elif type == 1.1:
            x = squeezenet.squeezenet1_1(input_shape, num_classes=output_size)
        else:
            print("请输入1,1.0这两个数字中的一个!")
        model = Model(inputs=input_shape, outputs=x)
        self.classifier = model

    def resnet(self,
               type,
               img_size=(64, 64),
               img_channels=3,
               output_size=1000):
        input_shape = Input(shape=(*img_size, img_channels))
        if type == 18:
            x = resnet.resnet18(input_shape, num_classes=output_size)
        elif type == 34:
            x = resnet.resnet34(input_shape, num_classes=output_size)
        elif type == 50:
            x = resnet.resnet50(input_shape, num_classes=output_size)
        elif type == 101:
            x = resnet.resnet101(input_shape, num_classes=output_size)
        elif type == 152:
            x = resnet.resnet152(input_shape, num_classes=output_size)
        else:
            print("请输入18,34,50,101,152这五个数字中的一个!")
            return
        model = Model(inputs=input_shape, outputs=x)
        self.classifier = model

    def inception(self, img_size=(299, 299), img_channels=3, output_size=1000):
        input_shape = Input(shape=(*img_size, img_channels))
        x = inception.inception_v3(input_shape,
                                   num_classes=output_size,
                                   aux_logits=True,
                                   transform_input=False)
        model = Model(inputs=input_shape, outputs=x)
        self.classifier = model

    def densenet(self,
                 type,
                 img_size=(299, 299),
                 img_channels=3,
                 output_size=1000):
        input_shape = Input(shape=(*img_size, img_channels))
        if type == 161:
            x = densenet.densenet161(input_shape, num_classes=output_size)
        elif type == 121:
            x = densenet.densenet121(input_shape, num_classes=output_size)
        elif type == 169:
            x = densenet.densenet169(input_shape, num_classes=output_size)
        elif type == 201:
            x = densenet.densenet201(input_shape, num_classes=output_size)
        else:
            print("请输入161,121,169,201这四个数字中的一个!")
            return
        model = Model(inputs=input_shape, outputs=x)
        self.classifier = model

    def alexnet(self, img_size=(299, 299), img_channels=3, output_size=1000):
        input_shape = Input(shape=(*img_size, img_channels))
        x = alexnet.alexnet(input_shape, num_classes=output_size)
        model = Model(inputs=input_shape, outputs=x)
        self.classifier = model

    def add_conv_layer(self, img_size=(32, 32), img_channels=3):
        self.classifier.add(
            BatchNormalization(input_shape=(*img_size, img_channels)))

        self.classifier.add(
            Conv2D(32, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(32, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(64, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(64, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(128, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(128, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(256, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(256, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

    def add_flatten_layer(self):
        self.classifier.add(Flatten())

    def add_ann_layer(self, output_size):
        self.classifier.add(Dense(512, activation='relu'))
        self.classifier.add(BatchNormalization())
        self.classifier.add(Dropout(0.5))
        self.classifier.add(Dense(output_size, activation='sigmoid'))

    def _get_fbeta_score2(self, classifier, X_valid, y_valid):
        p_valid = classifier.predict(X_valid)
        result_threshold_list_final, score_result = self.grid_search_best_threshold(
            y_valid, np.array(p_valid))
        return result_threshold_list_final, score_result

    def _get_fbeta_score(self, classifier, X_valid, y_valid):
        p_valid = classifier.predict(X_valid)
        return fbeta_score(y_valid,
                           np.array(p_valid) > 0.2,
                           beta=2,
                           average='samples')

    def grid_search_best_threshold(self, y_valid, p_valid):
        threshold_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
        result_threshold_list_temp = [
            0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
            0.2, 0.2, 0.2, 0.2
        ]
        result_threshold_list_final = [
            0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
            0.2, 0.2, 0.2, 0.2
        ]
        for i in range(17):
            score_result = 0
            for j in range(9):
                result_threshold_list_temp[i] = threshold_list[j]
                score_temp = fbeta_score(y_valid,
                                         p_valid > result_threshold_list_temp,
                                         beta=2,
                                         average='samples')
                if score_result < score_temp:
                    score_result = score_temp
                    result_threshold_list_final[i] = threshold_list[j]
            result_threshold_list_temp[i] = result_threshold_list_final[i]
        return result_threshold_list_final, score_result

    def train_model(self,
                    x_train,
                    y_train,
                    learn_rate=0.001,
                    epoch=5,
                    batch_size=128,
                    validation_split_size=0.2,
                    train_callbacks=()):
        history = LossHistory()

        X_train, X_valid, y_train, y_valid = train_test_split(
            x_train, y_train, test_size=validation_split_size)

        self.x_vail = X_valid
        self.y_vail = y_valid
        opt = Adam(lr=learn_rate)

        self.classifier.compile(loss='binary_crossentropy',
                                optimizer=opt,
                                metrics=['accuracy'])

        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=3,
                                      verbose=0,
                                      mode='auto')

        self.classifier.fit(
            X_train,
            y_train,
            batch_size=batch_size,
            epochs=epoch,
            verbose=1,
            validation_data=(X_valid, y_valid),
            callbacks=[history, *train_callbacks, earlyStopping])
        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        return [history.train_losses, history.val_losses, fbeta_score]

    def train_model_generator(self,
                              generator_train,
                              generator_valid,
                              learn_rate=0.001,
                              epoch=5,
                              batchSize=128,
                              steps=32383,
                              validation_steps=8096,
                              train_callbacks=()):
        history = LossHistory()
        #valid 8096  32383
        opt = Adam(lr=learn_rate)

        steps = steps / batchSize + 1 - 9
        validation_steps = validation_steps / batchSize + 1
        if steps % batchSize == 0:
            steps = steps / batchSize - 9
        if validation_steps % batchSize == 0:
            validation_steps = validation_steps / batchSize

        print(steps, validation_steps)
        self.classifier.compile(loss='binary_crossentropy',
                                optimizer=opt,
                                metrics=['accuracy'])

        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=3,
                                      verbose=0,
                                      mode='auto')

        self.classifier.fit_generator(
            generator_train,
            steps_per_epoch=steps,
            epochs=epoch,
            verbose=1,
            validation_data=generator_valid,
            validation_steps=validation_steps,
            callbacks=[history, *train_callbacks, earlyStopping])
        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        return [history.train_losses, history.val_losses, fbeta_score]

    def generate_trainOrValid_img_from_file(self,
                                            train_set_folder,
                                            train_csv_file,
                                            img_resize=(32, 32),
                                            batchSize=128,
                                            process_count=cpu_count()):
        labels_df = pd.read_csv(train_csv_file)
        labels = sorted(
            set(
                chain.from_iterable(
                    [tags.split(" ") for tags in labels_df['tags'].values])))
        labels_map = {l: i for i, l in enumerate(labels)}

        files_path = []
        tags_list = []
        for file_name, tags in labels_df.values:
            files_path.append('{}/{}.jpg'.format(train_set_folder, file_name))
            tags_list.append(tags)

        X = []
        Y = []

        iter_num = 1
        self.y_map = {v: k for k, v in labels_map.items()}
        with ThreadPoolExecutor(process_count) as pool:
            for img_array, targets in tqdm(pool.map(
                    self._train_transform_to_matrices,
                [(file_path, tag, labels_map, img_resize)
                 for file_path, tag in zip(files_path, tags_list)]),
                                           total=len(files_path)):
                if iter_num % batchSize == 0:
                    X = []
                    Y = []
                    iter_num = 0
                X.append(img_array)
                Y.append(targets)
                iter_num += 1
                if iter_num == batchSize:
                    print(iter_num)
                    yield (np.array(X), np.array(Y))

    def _train_transform_to_matrices(self, *args):
        file_path, tags, labels_map, img_resize = list(args[0])
        img = Image.open(file_path)
        img.thumbnail(img_resize)

        img_array = np.asarray(img.convert("RGB"), dtype=np.float32) / 255

        targets = np.zeros(len(labels_map))
        for t in tags.split(' '):
            targets[labels_map[t]] = 1
        return img_array, targets

    def generate_test_img_from_file(self,
                                    test_set_folder,
                                    img_resize=(32, 32),
                                    batchSize=128,
                                    process_count=cpu_count()):
        x_test = []
        x_test_filename = []
        files_name = os.listdir(test_set_folder)

        X = []
        Y = []
        iter_num = 1
        with ThreadPoolExecutor(process_count) as pool:
            for img_array, file_name in tqdm(pool.map(
                    _test_transform_to_matrices,
                [(test_set_folder, file_name, img_resize)
                 for file_name in files_name]),
                                             total=len(files_name)):
                x_test.append(img_array)
                x_test_filename.append(file_name)
                self.test_img_name_list = x_test_filename

                if iter_num % batchSize == 0:
                    X = []
                    Y = []
                    iter_num = 0
                X.append(img_array)
                Y.append(targets)
                iter_num += 1
                if iter_num == batchSize:
                    print(iter_num)
                    yield (np.array(X), np.array(Y))

    def _test_transform_to_matrices(self, *args):
        test_set_folder, file_name, img_resize = list(args[0])
        img = Image.open('{}/{}'.format(test_set_folder, file_name))
        img.thumbnail(img_resize)
        # Convert to RGB and normalize
        img_array = np.array(img.convert("RGB"), dtype=np.float32) / 255
        return img_array, file_name

    def save_weights(self, weight_file_path):
        self.classifier.save_weights(weight_file_path)

    def load_weights(self, weight_file_path):
        self.classifier.load_weights(weight_file_path)

    def setBestThreshold(self):
        result_threshold_list_final, score_result = self._get_fbeta_score2(
            self.classifier, self.x_vail, self.y_vail)
        print('最好得分:{}'.format(score_result))
        print('最好的阈值:{}'.format(result_threshold_list_final))
        return result_threshold_list_final

    def predict(self, x_test):
        predictions = self.classifier.predict(x_test)
        return predictions

    def predict_generator(self, generator):
        predictions = self.classifier.predcit_generator(generator)
        return predictions

    def map_predictions(self, predictions, labels_map, thresholds):
        predictions_labels = []
        for prediction in predictions:
            labels = [
                labels_map[i] for i, value in enumerate(prediction)
                if value > thresholds[i]
            ]
            predictions_labels.append(labels)

        return predictions_labels

    def close(self):
        backend.clear_session()
Exemplo n.º 4
0
def main_fun(args, ctx):
    import numpy
    import os
    import tensorflow as tf
    import tensorflow.contrib.keras as keras
    from tensorflow.contrib.keras.api.keras import backend as K
    from tensorflow.contrib.keras.api.keras.models import Sequential, load_model, save_model
    from tensorflow.contrib.keras.api.keras.layers import Dense, Dropout
    from tensorflow.contrib.keras.api.keras.optimizers import RMSprop
    from tensorflow.contrib.keras.python.keras.callbacks import LambdaCallback, TensorBoard

    from tensorflow.python.saved_model import builder as saved_model_builder
    from tensorflow.python.saved_model import tag_constants
    from tensorflow.python.saved_model.signature_def_utils_impl import predict_signature_def

    from tensorflowonspark import TFNode

    cluster, server = TFNode.start_cluster_server(ctx)

    if ctx.job_name == "ps":
        server.join()
    elif ctx.job_name == "worker":

        def generate_rdd_data(tf_feed, batch_size):
            print("generate_rdd_data invoked")
            while True:
                batch = tf_feed.next_batch(batch_size)
                imgs = []
                lbls = []
                for item in batch:
                    imgs.append(item[0])
                    lbls.append(item[1])
                images = numpy.array(imgs).astype('float32') / 255
                labels = numpy.array(lbls).astype('float32')
                yield (images, labels)

        with tf.device(
                tf.train.replica_device_setter(
                    worker_device="/job:worker/task:%d" % ctx.task_index,
                    cluster=cluster)):

            IMAGE_PIXELS = 28
            batch_size = 100
            num_classes = 10

            # the data, shuffled and split between train and test sets
            if args.input_mode == 'tf':
                from tensorflow.contrib.keras.api.keras.datasets import mnist
                (x_train, y_train), (x_test, y_test) = mnist.load_data()
                x_train = x_train.reshape(60000, 784)
                x_test = x_test.reshape(10000, 784)
                x_train = x_train.astype('float32') / 255
                x_test = x_test.astype('float32') / 255

                # convert class vectors to binary class matrices
                y_train = keras.utils.to_categorical(y_train, num_classes)
                y_test = keras.utils.to_categorical(y_test, num_classes)
            else:  # args.mode == 'spark'
                x_train = tf.placeholder(tf.float32,
                                         [None, IMAGE_PIXELS * IMAGE_PIXELS],
                                         name="x_train")
                y_train = tf.placeholder(tf.float32, [None, 10],
                                         name="y_train")

            model = Sequential()
            model.add(Dense(512, activation='relu', input_shape=(784, )))
            model.add(Dropout(0.2))
            model.add(Dense(512, activation='relu'))
            model.add(Dropout(0.2))
            model.add(Dense(10, activation='softmax'))

            model.summary()

            model.compile(loss='categorical_crossentropy',
                          optimizer=RMSprop(),
                          metrics=['accuracy'])

        saver = tf.train.Saver()

        with tf.Session(server.target) as sess:
            K.set_session(sess)

            def save_checkpoint(epoch, logs=None):
                if epoch == 1:
                    tf.train.write_graph(sess.graph.as_graph_def(),
                                         args.model_dir, 'graph.pbtxt')
                saver.save(sess,
                           os.path.join(args.model_dir, 'model.ckpt'),
                           global_step=epoch * args.steps_per_epoch)

            ckpt_callback = LambdaCallback(on_epoch_end=save_checkpoint)
            tb_callback = TensorBoard(log_dir=args.model_dir,
                                      histogram_freq=1,
                                      write_graph=True,
                                      write_images=True)

            # add callbacks to save model checkpoint and tensorboard events (on worker:0 only)
            callbacks = [ckpt_callback, tb_callback
                         ] if ctx.task_index == 0 else None

            if args.input_mode == 'tf':
                # train & validate on in-memory data
                history = model.fit(x_train,
                                    y_train,
                                    batch_size=batch_size,
                                    epochs=args.epochs,
                                    verbose=1,
                                    validation_data=(x_test, y_test),
                                    callbacks=callbacks)
            else:  # args.input_mode == 'spark':
                # train on data read from a generator which is producing data from a Spark RDD
                tf_feed = TFNode.DataFeed(ctx.mgr)
                history = model.fit_generator(
                    generator=generate_rdd_data(tf_feed, batch_size),
                    steps_per_epoch=args.steps_per_epoch,
                    epochs=args.epochs,
                    verbose=1,
                    callbacks=callbacks)

            if args.export_dir and ctx.job_name == 'worker' and ctx.task_index == 0:
                # save a local Keras model, so we can reload it with an inferencing learning_phase
                save_model(model, "tmp_model")

                # reload the model
                K.set_learning_phase(False)
                new_model = load_model("tmp_model")

                # export a saved_model for inferencing
                builder = saved_model_builder.SavedModelBuilder(
                    args.export_dir)
                signature = predict_signature_def(
                    inputs={'images': new_model.input},
                    outputs={'scores': new_model.output})
                builder.add_meta_graph_and_variables(
                    sess=sess,
                    tags=[tag_constants.SERVING],
                    signature_def_map={'predict': signature},
                    clear_devices=True)
                builder.save()

            if args.input_mode == 'spark':
                tf_feed.terminate()
Exemplo n.º 5
0
class AmazonKerasClassifier:
    def __init__(self):
        self.losses = []
        self.classifier = Sequential()

    def add_conv_layer(self, img_size=(32, 32), img_channels=3):
        self.classifier.add(
            BatchNormalization(input_shape=(*img_size, img_channels)))
        self.classifier.add(Conv2D(32, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
        self.classifier.add(Dropout(0.25))
        self.classifier.add(Conv2D(64, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
        self.classifier.add(Dropout(0.25))
        self.classifier.add(Conv2D(16, (2, 2), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=(2, 2)))
        self.classifier.add(Dropout(0.25))

    def add_flatten_layer(self):
        self.classifier.add(Flatten())

    def add_ann_layer(self, output_size):
        self.classifier.add(Dense(256, activation='relu'))
        self.classifier.add(Dropout(0.25))
        self.classifier.add(Dense(128, activation='relu'))
        self.classifier.add(Dropout(0.25))
        self.classifier.add(Dense(output_size, activation='sigmoid'))

    def _get_fbeta_score(self, classifier, X_valid, y_valid):
        p_valid = classifier.predict(X_valid)
        #print ('p_valid')
        #print(p_valid.shape)
        #print(p_valid)
        return fbeta_score(y_valid,
                           np.array(p_valid) > 0.2,
                           beta=2,
                           average='samples')

    def train_model(self,
                    x_train,
                    y_train,
                    epoch=5,
                    batch_size=128,
                    validation_split_size=0.2,
                    train_callbacks=()):
        history = LossHistory()

        X_train, X_valid, y_train, y_valid = train_test_split(
            x_train, y_train, test_size=validation_split_size)
        adam = Adam(lr=0.01, decay=1e-6)
        rms = RMSprop(lr=0.0001, decay=1e-6)
        self.classifier.compile(loss='binary_crossentropy',
                                optimizer='adam',
                                metrics=['accuracy'])

        print('X_train.shape[0]')
        print(X_train.shape[0])

        checkpointer = ModelCheckpoint(filepath="weights.best.hdf5",
                                       verbose=1,
                                       save_best_only=True)
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=
            False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=
            0,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=
            0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=
            0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images

        datagen.fit(X_train)

        self.classifier.fit_generator(
            datagen.flow(X_train, y_train, batch_size=batch_size),
            steps_per_epoch=X_train.shape[0] // batch_size,
            epochs=epoch,
            validation_data=(X_valid, y_valid),
            callbacks=[history, *train_callbacks, checkpointer])

        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        print(fbeta_score)
        return [history.train_losses, history.val_losses, fbeta_score]

    def load_weight(self):
        self.classifier.load_weights("weights.best.hdf5")

    def predict(self, x_test):
        predictions = self.classifier.predict(x_test)
        #print('predictions')
        #print(predictions.shape)
        #print(predictions)
        return predictions

    def map_predictions(self, predictions, labels_map, thresholds):
        """
        Return the predictions mapped to their labels
        :param predictions: the predictions from the predict() method
        :param labels_map: the map 
        :param thresholds: The threshold of each class to be considered as existing or not existing
        :return: the predictions list mapped to their labels
        """
        predictions_labels = []
        for prediction in predictions:
            labels = [
                labels_map[i] for i, value in enumerate(prediction)
                if value > thresholds[i]
            ]
            predictions_labels.append(labels)

        return predictions_labels

    def close(self):
        backend.clear_session()
Exemplo n.º 6
0
class Model(object):
    def __init__(self):
        self.model = Sequential()
        self.model.add(
            Conv2D(32, (3, 3), input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Conv2D(32, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Conv2D(64, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 2)))

        self.model.add(Flatten())
        self.model.add(Dense(64))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(1))
        self.model.add(Activation('sigmoid'))

    def train(self, dataset, batch_size=batch_size, nb_epoch=epochs):

        self.model.compile(loss='binary_crossentropy',
                           optimizer='adam',
                           metrics=['accuracy'])
        self.model.fit_generator(
            dataset.train,
            steps_per_epoch=nb_train_samples // batch_size,
            epochs=epochs,
            validation_data=dataset.valid,
            validation_steps=nb_validation_samples // batch_size)

    def save(self, file_path=FILE_PATH):
        print('Model Saved.')
        self.model.save_weights(file_path)

    def load(self, file_path=FILE_PATH):
        print('Model Loaded.')
        self.model.load_weights(file_path)

    def predict(self, image):
        # 预测样本分类
        img = image.resize((1, IMAGE_SIZE, IMAGE_SIZE, 3))
        img = image.astype('float32')
        img /= 255

        #归一化
        result = self.model.predict(img)
        print(result)
        # 概率
        result = self.model.predict_classes(img)
        print(result)
        # 0/1

        return result[0]

    def evaluate(self, dataset):
        # 测试样本准确率
        score = self.model.evaluate_generator(dataset.valid, steps=2)
        print("样本准确率%s: %.2f%%" %
              (self.model.metrics_names[1], score[1] * 100))
Exemplo n.º 7
0
                                                batch_size=batch_size,
                                                class_mode='binary')
#Importing my test set images :     
test_set = test_datagen.flow_from_directory('dataset/test_set',
                                            target_size=input_size,
                                            batch_size=batch_size,
                                            class_mode='binary')
     
# Creating a loss history class object :
history = LossHistory()

# Fitting the model i.e. training it :     
classifier.fit_generator(training_set,
                         steps_per_epoch=8000/batch_size, #Amount of batches to be completed before declaring an epoch to be finished.
                         epochs=90, 
                         validation_data=test_set, 
                         validation_steps=2000/batch_size,
                         workers=12,  #adjusted workers and maxQsize for my personal GPU performance.
                         max_queue_size=100,
                         callbacks=[history]) #recording training stats into history class object.
     


# PART 3 - MAKING PREDICTIONS, SAVING MODEL, SAVING LOSS HISTORY TO FILE.

# Saving model :
model_path = 'dataset/cat_or_dog_model.h5'
classifier.save(model_path)
print("Model saved to", model_path)
     
# Saving loss history to file : 
lossLog_path = 'dataset/loss_history.log'
Exemplo n.º 8
0
class AmazonKerasClassifier:
    def __init__(self):
        self.losses = []
        self.classifier = Sequential()

    def add_conv_layer(self, img_size=(32, 32), img_channels=3):
        self.classifier.add(
            BatchNormalization(input_shape=(*img_size, img_channels)))

        self.classifier.add(
            Conv2D(32, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(32, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(64, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(64, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(128, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(128, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

        self.classifier.add(
            Conv2D(256, (3, 3), padding='same', activation='relu'))
        self.classifier.add(Conv2D(256, (3, 3), activation='relu'))
        self.classifier.add(MaxPooling2D(pool_size=2))
        self.classifier.add(Dropout(0.25))

    def add_flatten_layer(self):
        self.classifier.add(Flatten())

    def add_ann_layer(self, output_size):
        self.classifier.add(Dense(512, activation='relu'))
        self.classifier.add(BatchNormalization())
        self.classifier.add(Dropout(0.5))
        self.classifier.add(Dense(output_size, activation='sigmoid'))

    def _get_fbeta_score(self, classifier, X_valid, y_valid):
        p_valid = classifier.predict(X_valid)
        return fbeta_score(y_valid,
                           np.array(p_valid) > 0.2,
                           beta=2,
                           average='samples')

    def train_model(self,
                    x_train,
                    y_train,
                    learn_rate=0.001,
                    epoch=5,
                    batch_size=128,
                    validation_split_size=0.2,
                    train_callbacks=()):
        history = LossHistory()

        X_train, X_valid, y_train, y_valid = train_test_split(
            x_train, y_train, test_size=validation_split_size)

        opt = Adam(lr=learn_rate)

        self.classifier.compile(loss='binary_crossentropy',
                                optimizer=opt,
                                metrics=['accuracy'])

        # early stopping will auto-stop training process if model stops learning after 3 epochs
        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=3,
                                      verbose=0,
                                      mode='auto')

        self.classifier.fit(
            X_train,
            y_train,
            batch_size=batch_size,
            epochs=epoch,
            verbose=1,
            validation_data=(X_valid, y_valid),
            callbacks=[history, *train_callbacks, earlyStopping])
        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        return [history.train_losses, history.val_losses, fbeta_score]

    def train_model_aug(self,
                        x_train,
                        y_train,
                        learn_rate=0.001,
                        epoch=5,
                        batch_size=128,
                        validation_split_size=0.15,
                        train_callbacks=()):
        history = LossHistory()

        X_train, X_valid, y_train, y_valid = train_test_split(
            x_train, y_train, test_size=validation_split_size)

        opt = Adam(lr=learn_rate)

        self.classifier.compile(loss='binary_crossentropy',
                                optimizer=opt,
                                metrics=['accuracy'])

        # early stopping will auto-stop training process if model stops learning after 3 epochs
        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=3,
                                      verbose=0,
                                      mode='auto')

        datagen = ImageDataGenerator(rotation_range=10,
                                     width_shift_range=0.2,
                                     height_shift_range=0.2,
                                     zoom_range=0.1,
                                     horizontal_flip=True)

        datagen.fit(x_train)

        self.classifier.fit_generator(
            datagen.flow(X_train, y_train, batch_size=32),
            steps_per_epoch=len(x_train) / 32,
            epochs=epoch,
            verbose=1,
            validation_data=(X_valid, y_valid),
            callbacks=[history, *train_callbacks, earlyStopping])

        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        return [history.train_losses, history.val_losses, fbeta_score]

    def save_weights(self, weight_file_path):
        self.classifier.save_weights(weight_file_path)

    def load_weights(self, weight_file_path):
        self.classifier.load_weights(weight_file_path)

    def predict(self, x_test):
        predictions = self.classifier.predict(x_test)
        return predictions

    def map_predictions(self, predictions, labels_map, thresholds):
        """
        Return the predictions mapped to their labels
        :param predictions: the predictions from the predict() method
        :param labels_map: the map
        :param thresholds: The threshold of each class to be considered as existing or not existing
        :return: the predictions list mapped to their labels
        """
        predictions_labels = []
        for prediction in predictions:
            labels = [
                labels_map[i] for i, value in enumerate(prediction)
                if value > thresholds[i]
            ]
            predictions_labels.append(labels)

        return predictions_labels

    def close(self):
        backend.clear_session()