Exemplo n.º 1
0
    def train_model(self,
                    x_train,
                    y_train,
                    epoch=5,
                    batch_size=128,
                    validation_split_size=0.2,
                    train_callbacks=()):
        history = LossHistory()

        X_train, X_valid, y_train, y_valid = train_test_split(
            x_train, y_train, test_size=validation_split_size)
        adam = Adam(lr=0.01, decay=1e-6)
        rms = RMSprop(lr=0.0001, decay=1e-6)
        self.classifier.compile(loss='binary_crossentropy',
                                optimizer='adam',
                                metrics=['accuracy'])

        print('X_train.shape[0]')
        print(X_train.shape[0])

        checkpointer = ModelCheckpoint(filepath="weights.best.hdf5",
                                       verbose=1,
                                       save_best_only=True)
        datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=
            False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=
            0,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=
            0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=
            0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images

        datagen.fit(X_train)

        self.classifier.fit_generator(
            datagen.flow(X_train, y_train, batch_size=batch_size),
            steps_per_epoch=X_train.shape[0] // batch_size,
            epochs=epoch,
            validation_data=(X_valid, y_valid),
            callbacks=[history, *train_callbacks, checkpointer])

        fbeta_score = self._get_fbeta_score(self.classifier, X_valid, y_valid)
        print(fbeta_score)
        return [history.train_losses, history.val_losses, fbeta_score]
Exemplo n.º 2
0
def get_model():
    """
    get model
    """

    checkpoint = ModelCheckpoint(MODEL_NAME,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    model = Sequential()
    input_shape = (IMAGE_SIZE, IMAGE_SIZE, 3)

    model.add(BatchNormalization(input_shape=input_shape))

    model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.25))

    model.add(Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.25))

    model.add(Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=2))
    model.add(Dropout(0.25))

    model.add(Flatten())

    model.add(Dense(512, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
    model.add(Dense(N_CLASSES, activation='sigmoid'))

    return model
Exemplo n.º 3
0
for weather in ['clear', 'cloudy', 'haze', 'partly_cloudy']:
    classifier = build_classifier(13)
    weather_ind = labels_map[weather]
    weather_subset = np.where(y_train[:, weather_ind] == 1)[0]
    x_current, y_current = x_train[weather_subset], y_train[
        weather_subset][:, non_weather_labels]
    if x_current.shape[0] < 5000:
        x_current, y_current = np.vstack(
            (x_current, np.rot90(x_current, k=1, axes=(1, 2)),
             np.rot90(x_current, k=2, axes=(1, 2)),
             np.rot90(x_current, k=3, axes=(1, 2)))), np.vstack(
                 (y_current, y_current, y_current, y_current))
    filepath = "%s_weights.best.hdf5" % weather
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)
    train_losses, val_losses = [], []
    epochs_arr = [10, 5, 5]
    learn_rates = [0.001, 0.0001, 0.00001]
    for learn_rate, epochs in zip(learn_rates, epochs_arr):
        tmp_train_losses, tmp_val_losses, fbeta_score = classifier.train_model(
            x_current,
            y_current,
            learn_rate,
            epochs,
            batch_size,
            validation_split_size=validation_split_size,
            train_callbacks=[checkpoint])
        print(fbeta_score)
        train_losses += tmp_train_losses
def train_autoencoder(data,
                      codec,
                      batch_size=1000,
                      epochs=1000,
                      saveFilePrefix=None,
                      use_tanh=True,
                      train_imagenet=False,
                      aux_num=0):
    """Train autoencoder

    python3 train_CAE.py -d mnist --compress_mode 1 --epochs 10000 --save_prefix mnist

    """

    ckptFileName = saveFilePrefix + "ckpt"

    encoder_model_filename = saveFilePrefix + "encoder.json"
    decoder_model_filename = saveFilePrefix + "decoder.json"
    encoder_weight_filename = saveFilePrefix + "encoder.h5"
    decoder_weight_filename = saveFilePrefix + "decoder.h5"

    if os.path.exists(decoder_weight_filename):
        print("Load the pre-trained model.")
        codec.decoder.load_weights(decoder_weight_filename)
    elif os.path.exists(ckptFileName):
        print("Load the previous checkpoint")
        codec.decoder.load_weights(ckptFileName)

    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    codec.decoder.compile(loss='mse', optimizer=sgd)

    checkpointer = ModelCheckpoint(filepath=ckptFileName,
                                   verbose=1,
                                   save_best_only=True)

    if train_imagenet:
        # train using Keras imageGenerator
        print("Training using imageGenerator:")
        codec.decoder.fit_generator(
            data.train_generator_flow,
            steps_per_epoch=100,
            epochs=epochs,
            validation_data=data.validation_generator_flow,
            validation_steps=100,
            callbacks=[checkpointer])
    else:
        # in-memory training
        x_train = data.validation_data
        # add zeros for correction
        if aux_num > 0:
            x_train = np.concatenate(
                (x_train, np.zeros((aux_num, ) + x_train.shape[1:])), axis=0)
            x_train = np.concatenate(
                (x_train, 0.5 * np.ones((aux_num, ) + x_train.shape[1:])),
                axis=0)
            x_train = np.concatenate(
                (x_train, -0.5 * np.ones((aux_num, ) + x_train.shape[1:])),
                axis=0)

        y_train = x_train
        x_test = data.test_data
        y_test = x_test
        print("In-memory training:")
        print("Shape of training data:{}".format(x_train.shape))
        print("Shape of testing data:{}".format(x_test.shape))

        codec.decoder.fit(x_train,
                          y_train,
                          batch_size=batch_size,
                          validation_data=(x_test, y_test),
                          epochs=epochs,
                          shuffle=True,
                          callbacks=[checkpointer])

    print("Checkpoint is saved to {}\n".format(ckptFileName))

    model_json = codec.encoder.to_json()
    with open(encoder_model_filename, "w") as json_file:
        json_file.write(model_json)
    print(
        "Encoder specification is saved to {}".format(encoder_model_filename))

    codec.encoder.save_weights(encoder_weight_filename)
    print("Encoder weight is saved to {}\n".format(encoder_weight_filename))

    model_json = codec.decoder.to_json()
    with open(decoder_model_filename, "w") as json_file:
        json_file.write(model_json)

    print(
        "Decoder specification is saved to {}".format(decoder_model_filename))

    codec.decoder.save_weights(decoder_weight_filename)
    print("Decoder weight is saved to {}\n".format(decoder_weight_filename))
Exemplo n.º 5
0
def main():
    #img_resize = (64, 64)
    img_resize = (64, 64)
    validation_split_size = 0.2
    batch_size = 128
    imageInfo = pd.read_csv('../data/train_v2.csv/train_v2.csv')

    labels_list = list(
        chain.from_iterable(
            [tags.split(" ") for tags in imageInfo['tags'].values]))
    labels_set = set(labels_list)
    print("总共有{}个标签,分别是{}".format(len(labels_set), labels_set))

    x_train, y_train, y_map = DataPreprocess.preprocess_train_data(
        '../data/train-jpg', '../data/train_v2.csv/train_v2.csv', img_resize)
    # Free up all available memory space after this heavy operation
    gc.collect()
    print("x_train shape: {}".format(x_train.shape))
    print("y_train shape: {}".format(y_train.shape))

    filepath = "weights.best.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    classifier = AmazonKerasClassifier()
    classifier.add_conv_layer(img_resize)
    classifier.add_flatten_layer()
    classifier.add_ann_layer(len(y_map))
    #classifier.vgg(16,img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.squeezenet(img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.resnet(1,img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.densenet(121,img_size=img_resize,img_channels=3,output_size=len(y_map))
    classifier.alexnet(img_size=img_resize,
                       img_channels=3,
                       output_size=len(y_map))

    #训练模型
    train_losses, val_losses = [], []
    epochs_arr = [1]  #[20, 5, 5]
    learn_rates = [0.001]  #[0.001, 0.0001, 0.00001]
    for learn_rate, epochs in zip(learn_rates, epochs_arr):
        tmp_train_losses, tmp_val_losses, fbeta_score = classifier.train_model(
            x_train,
            y_train,
            learn_rate,
            epochs,
            batch_size,
            validation_split_size=validation_split_size,
            train_callbacks=[checkpoint])
        train_losses += tmp_train_losses
        val_losses += tmp_val_losses

    #加载训练数据
    classifier.load_weights("weights.best.hdf5")
    print("Weights loaded")
    print(fbeta_score)

    result_threshold_list_final = classifier.setBestThreshold()

    del x_train, y_train
    gc.collect()
    #预测
    x_test, x_test_filename = DataPreprocess.preprocess_test_data(
        '../data/test-jpg', img_resize)
    # Predict the labels of our x_test images
    predictions = classifier.predict(x_test)

    del x_test
    gc.collect()

    x_test, x_test_filename_additional = DataPreprocess.preprocess_test_data(
        '../data/test-jpg-additional', img_resize)
    new_predictions = classifier.predict(x_test)

    del x_test
    gc.collect()
    predictions = np.vstack((predictions, new_predictions))
    x_test_filename = np.hstack((x_test_filename, x_test_filename_additional))
    print(
        "Predictions shape: {}\nFiles name shape: {}\n1st predictions entry:\n{}"
        .format(predictions.shape, x_test_filename.shape, predictions[0]))

    thresholds = [0.2] * len(labels_set)
    predicted_labels = classifier.map_predictions(predictions, y_map,
                                                  thresholds)

    tags_list = [None] * len(predicted_labels)
    for i, tags in enumerate(predicted_labels):
        tags_list[i] = ' '.join(map(str, tags))

    print("tags_list:".format(tags_list))
    print('x_test_filename'.format(x_test_filename))
    print(':')
    print(tags_list)
    print(':')
    print(x_test_filename)

    final_data = [[filename.split(".")[0], tags]
                  for filename, tags in zip(x_test_filename, tags_list)]
    final_df = pd.DataFrame(final_data, columns=['image_name', 'tags'])
    final_df.to_csv('./submission_file.csv', index=False)
    classifier.close()
Exemplo n.º 6
0
def main2():
    #img_resize = (64, 64)
    img_resize = (64, 64)
    validation_split_size = 0.2
    batch_size = 128
    imageInfo = pd.read_csv('../data/train_v2.csv/train_v2.csv')

    labels_list = list(
        chain.from_iterable(
            [tags.split(" ") for tags in imageInfo['tags'].values]))
    labels_set = set(labels_list)
    print("总共有{}个标签,分别是{}".format(len(labels_set), labels_set))

    filepath = "weights.best.hdf5"
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)

    classifier = AmazonKerasClassifier()
    classifier.setTrainFilePath('../data/train_v2.csv/train_v2.csv')
    classifier.setValidFilePath('../data/valid-v2.csv/valid_v2.csv')
    classifier.setTrainImgFilePath('../data/train-jpg')
    classifier.setValidImgFilePath('../data/valid-jpg')
    classifier.setTestImgFilePath('../data/test-jpg')
    classifier.setTestAdditionImgFilePath('../data/test-jpg-additional')
    #     classifier.add_conv_layer(img_resize)
    #     classifier.add_flatten_layer()
    #     classifier.add_ann_layer(len(y_map))
    #classifier.vgg(16,img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.squeezenet(img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.resnet(1,img_size=img_resize,img_channels=3,output_size=len(y_map))
    #classifier.densenet(121,img_size=img_resize,img_channels=3,output_size=len(y_map))
    classifier.alexnet(img_size=img_resize,
                       img_channels=3,
                       output_size=len(labels_set))

    #训练模型
    train_losses, val_losses = [], []
    epochs_arr = [1]  #[20, 5, 5]
    learn_rates = [0.001]  #[0.001, 0.0001, 0.00001]
    for learn_rate, epochs in zip(learn_rates, epochs_arr):
        tmp_train_losses, tmp_val_losses, fbeta_score = classifier.train_model_generator(
            classifier.generate_trainOrValid_img_from_file(
                classifier.getTrainImgFilePath(),
                classifier.getTrainFilePath(),
                img_resize=img_resize),
            classifier.generate_trainOrValid_img_from_file(
                classifier.getValidImgFilePath(),
                classifier.getValidFilePath(),
                img_resize=img_resize),
            learn_rate,
            epochs,
            steps=32383,
            validation_steps=8096,
            train_callbacks=[checkpoint])
        train_losses += tmp_train_losses
        val_losses += tmp_val_losses

    y_map = classifier.getYMap()
    #加载训练数据
    classifier.load_weights("weights.best.hdf5")
    print("Weights loaded")
    print(fbeta_score)

    result_threshold_list_final = classifier.setBestThreshold()

    gc.collect()
    #预测
    predictions = classifier.predict_generator(
        classifier.generate_test_img_from_file(classifier.getTestImgFilePath(),
                                               img_resize=img_resize), 40669)
    x_test_filename = classifier.getTestImgNameList()
    gc.collect()

    new_predictions = classifier.predict_generator(
        classifier.generate_test_img_from_file(classifier.getTestImgFilePath(),
                                               img_resize=img_resize), 20522)
    x_test_filename_additional = classifier.getTestImgNameList()
    gc.collect()
    predictions = np.vstack((predictions, new_predictions))
    x_test_filename = np.hstack((x_test_filename, x_test_filename_additional))
    print(
        "Predictions shape: {}\nFiles name shape: {}\n1st predictions entry:\n{}"
        .format(predictions.shape, x_test_filename.shape, predictions[0]))

    thresholds = [0.2] * len(labels_set)
    predicted_labels = classifier.map_predictions(predictions, y_map,
                                                  thresholds)

    tags_list = [None] * len(predicted_labels)
    for i, tags in enumerate(predicted_labels):
        tags_list[i] = ' '.join(map(str, tags))

    print("tags_list:".format(tags_list))
    print('x_test_filename'.format(x_test_filename))
    print(':')
    print(tags_list)
    print(':')
    print(x_test_filename)

    final_data = [[filename.split(".")[0], tags]
                  for filename, tags in zip(x_test_filename, tags_list)]
    final_df = pd.DataFrame(final_data, columns=['image_name', 'tags'])
    final_df.to_csv('./submission_file.csv', index=False)
    classifier.close()
Exemplo n.º 7
0
# <codecell>
# <markdowncell>
# ## Define and Train model
# Here we define the model and begin training.
# Note that we have created a learning rate annealing schedule with a series of learning rates as defined in the array `learn_rates` and corresponding number of epochs for each `epochs_arr`. Feel free to change these values if you like or just use the defaults.
classifier = AmazonKerasClassifier()
#classifier.load_model(model_filepath+".json")       # load model
classifier.add_conv_layer(img_resize, img_channels)
classifier.add_flatten_layer()
classifier.add_ann_layer(len(y_map))
classifier.summary()
classifier.save_model(model_filepath + ".json")

checkpoint = ModelCheckpoint(model_filepath + ".hdf5",
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

train_losses, val_losses = [], []
epochs_arr = [15, 7, 7]
learn_rates = [0.001, 0.0001, 0.00001]
for learn_rate, epochs in zip(learn_rates, epochs_arr):
    tmp_train_losses, tmp_val_losses, fbeta_sc = classifier.train_model(
        x_train,
        y_train,
        x_valid,
        y_valid,
        learn_rate,
        epochs,
        batch_size,
        w_sam_map=w_sample,