def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    print("** weight path is {} **".format(model_weights_path))
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_ids = PatientInfo('stage_1_test_images/', train=True)
        train=False, 
        img_dir='stage_1_test_images/*' #image_source_dir+'*', #'stage_1_test_images/*',
def load_ori_model(config_file="./config.ini"):
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    # load CheXNet model:
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)
    return model
Пример #3
0
def load_model():

    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    return model, class_names
Пример #4
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("read bbox list file")
    df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1)
    df_images.columns = ["file_name", "label", "x", "y", "w", "h"]

    print("create a generator for loading transformed images")
    cam_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=1,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=1,
        shuffle_on_epoch_end=False,
    )

    image_output_dir = os.path.join(output_dir, "cam")
    if not os.path.isdir(image_output_dir):
        os.makedirs(image_output_dir)

    print("create CAM")
    df_images.apply(
        lambda g: create_cam(
            df_g=g,
            output_dir=image_output_dir,
            image_source_dir=image_source_dir,
            model=model,
            generator=cam_sequence,
            class_names=class_names,
        ),
        axis=1,
    )
Пример #5
0
# default config
output_dir = cp["DEFAULT"].get("output_dir")
base_model_name = cp["DEFAULT"].get("base_model_name")
class_names = cp["DEFAULT"].get("class_names").split(",")
image_source_dir = cp["DEFAULT"].get("image_source_dir")

# parse weights file path
output_weights_name = cp["TRAIN"].get("output_weights_name")

print(output_weights_name)
best_weights_path = os.path.join(output_dir, "best_weights.h5")

print("** load model **")
model_weights_path = best_weights_path

model_factory = ModelFactory()
model = model_factory.get_model(class_names,
                                model_name=base_model_name,
                                use_base_weights=False,
                                weights_path=model_weights_path)

model._make_predict_function()

app = Flask(__name__)


@app.route('/alive', methods=['GET'])
def alive():
    return 'Ok'

Пример #6
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir,
                                     "best_{}".format(output_weights_name))

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError("""
                test_steps: {} is invalid,
                please use 'auto' or integer.
                """.format(test_steps))
    print("** test_steps: {} **".format(test_steps))

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(output_dir, "dev.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()

    test_log_path = os.path.join(output_dir, "test.log")
    print("** write log to {} **".format(test_log_path))
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write("{}: {}\n".format(class_names[i], score))
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write("mean auroc: {}\n".format(mean_auroc))
        print("mean auroc: {}".format(mean_auroc))
Пример #7
0
from keras.utils import Sequence
from PIL import Image
from skimage.transform import resize

import cv2
from keras import backend as kb

app = Flask(__name__)

class_names = [
    'Atelectasis', 'Cardiomegaly', 'Effusion', 'Infiltration', 'Mass',
    'Nodule', 'Pneumonia', 'Pneumothorax', 'Consolidation', 'Edema',
    'Emphysema', 'Fibrosis', 'Pleural_Thickening', 'Hernia'
]

model_factory = ModelFactory()
model = model_factory.get_model(class_names,
                                model_name='DenseNet121',
                                use_base_weights=False,
                                weights_path='models/best_weights.h5')
graph = tf.get_default_graph()


def load_image(image_file):
    #image_path = os.path.join(self.source_image_dir, image_file)
    image = Image.open(image_file)
    image_array = np.asarray(image.convert("RGB"))
    image_array = image_array / 255.
    image_array = resize(image_array, (224, 224))
    return image_array
Пример #8
0
def train_rsna_clf(train_data=None, validation_data=None, remove_running=True):
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names1 = cp["DEFAULT"].get("class_names1").split(",")
    class_names2 = cp["DEFAULT"].get("class_names2").split(",")

    # train config
    train_image_source_dir = cp["TRAIN"].get("train_image_source_dir")
    train_class_info = cp["TRAIN"].get("train_class_info")
    train_box_info = cp["TRAIN"].get("train_box_info")
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        if remove_running:
            os.remove(running_flag_file)
            open(running_flag_file, "a").close()
        else:
            raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(
            train_data.df, class_names2)
        validation_counts, _ = get_sample_counts(validation_data.df,
                                                 class_names2)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(validation_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("** class_weights **")
        print(class_weights)

        print("** load model **")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, f"best_{input_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir,
                                                  input_weights_name)
        else:
            model_weights_file = None

        model_factory = ModelFactory()
        model = model_factory.get_model(
            class_names1,
            model_name=base_model_name,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension, image_dimension, 3))
        model = modify_last_layer(model, class_names2)

        if show_model_summary:
            print(model.summary())

        train_sq = AugmentedLabelSequence_clf(
            train_data,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
        validation_sq = AugmentedLabelSequence_clf(
            validation_data,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(
                output_weights_path,
                save_weights_only=True,
                save_best_only=True,
                verbose=1,
            )

        print("** compile model with class weights **")
        optimizer = Adam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss="binary_crossentropy")
        auroc = MultipleClassAUROC(
            sequence=validation_sq,
            class_names=class_names2,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1,
                              mode="min",
                              min_lr=min_lr),
            auroc,
        ]

        print("** start training **")
        history = model_train.fit_generator(
            generator=train_sq,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sq,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)
Пример #9
0
def main(fold,gender_train,gender_test):
    # parser config
    config_file = 'config_file.ini'
    cp = ConfigParser()
    cp.read(config_file)

    root_output_dir= cp["DEFAULT"].get("output_dir") 

    # default config 
    print(root_output_dir,gender_train)   
    output_dir= root_output_dir + gender_train+'/Fold_'+str(fold)+'/output/'

    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(root_output_dir+gender_train+'/Fold_'+str(fold),str(gender_test), class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(root_output_dir+gender_train+'/Fold_'+str(fold), str(gender_test)+".csv"),
     
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    print("** make prediction **")

    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()

    y_pred_dir = output_dir + "y_pred_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"
    y_true_dir = output_dir + "y_true_run_" + str(fold)+"_train"+gender_train+"_"+gender_test+ ".csv"


    np.savetxt(y_pred_dir, y_hat, delimiter=",")
    np.savetxt(y_true_dir, y, delimiter=",")
def load_model(config_file="./config.ini", change_arch=False, compile_=True):
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    best_weights_path = os.path.join(output_dir, f"best_{output_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    # load CheXNet model:
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)
    if change_arch:
        #return model
        # input layer, output layer:
        input_layer = model.get_layer(index=0)
        chex_output = model.get_layer(index=-1)
        # add second last layer:
        intermediate_layer = model.get_layer(index=-2)
        rsna_add_layer = Dense(10, activation='relu', name='rsna_add_layer')(
            intermediate_layer.output)  # params are tentative
        rsna_clf_output = Dense(3,
                                activation='softmax',
                                name='rsna_clf_output')(concatenate(
                                    [rsna_add_layer, chex_output.output]))
        model = Model(inputs=[input_layer.input], outputs=[rsna_clf_output])
        losses = {'rsna_clf_output': 'categorical_crossentropy'}
        if compile_:
            print('** compile **')
            model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.])
    else:
        if compile_:
            print('** compile **')
            model.compile(optimizer='rmsprop', loss=losses, loss_weights=[1.])
    return model
Пример #11
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")

    # parse weights file path
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    weights_path = os.path.join(output_dir, output_weights_name)
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    best_weights_path = os.path.join(data_set_dir,
                                     f"best_{input_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)

    # CAM config
    bbox_list_file = cp["CAM"].get("bbox_list_file")
    use_best_weights = cp["CAM"].getboolean("use_best_weights")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=base_model_name,
                                    use_base_weights=False,
                                    weights_path=model_weights_path)

    print("read bbox list file")
    df_images = pd.read_csv(bbox_list_file, header=None, skiprows=1)
    df_images.columns = ["file_name", "label", "x", "y", "w", "h"]

    print("create a generator for loading transformed images")
    cam_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )

    image_output_dir = os.path.join(output_dir, "cam")
    if not os.path.isdir(image_output_dir):
        os.makedirs(image_output_dir)

    print("create CAM")
    df_images.apply(
        lambda g: create_cam(
            df_g=g,
            output_dir=image_output_dir,
            image_source_dir=image_source_dir,
            model=model,
            generator=cam_sequence,
            class_names=class_names,
        ),
        axis=1,
    )
Пример #12
0
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    data_set_dir = cp["TRAIN"].get("dataset_csv_dir")

    # train config
    image_dimension = cp["TRAIN"].getint("image_dimension")

    # test config
    batch_size = cp["TEST"].getint("batch_size")
    test_steps = cp["TEST"].get("test_steps")
    use_best_weights = cp["TEST"].getboolean("use_best_weights")

    # parse weights file path
    input_weights_name = cp["TRAIN"].get("input_weights_name")
    weights_path = os.path.join(data_set_dir, input_weights_name)
    best_weights_path = os.path.join(data_set_dir, f"best_{input_weights_name}")

    # get test sample count
    test_counts, _ = get_sample_counts(data_set_dir, "test", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence(
#         dataset_csv_file=os.path.join(output_dir, "dev.csv"),
#         dataset_csv_file=os.path.join(data_set_dir, "test.csv"),
        dataset_csv_file=os.path.join(data_set_dir, "MIMIC_data_test_1206_combined.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )
#     test_sequence.dataset_df.to_csv(os.path.join(output_dir, 'test_data_frame.csv'))

    print("** make prediction **")
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()
#     np.savetxt(os.path.join(output_dir, 'y_hat_1205_default_weight.txt'), y_hat)
#     np.savetxt(os.path.join(output_dir, 'y_1205.txt'), y)

    test_log_path = os.path.join(output_dir, "test.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")
        print(f"mean auroc: {mean_auroc}")
Пример #13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_epoch', type=int, default=0)
    args = parser.parse_args()

    # Set Parameter #
    base_model_name = "DenseNet121"
    use_base_model_weights = True
    weights_path = None
    image_dimension = 224
    batch_size = 32
    epochs = 20
    class_names = ["Nodule", "Pneumothorax"]
    csv_path = './data/classification'
    image_source_dir = '/media/nfs/CXR/NIH/chest_xrays/NIH/data/images_1024x1024/'
    augmenter = None
    #  If train_steps is set to None, will calculate train steps by len(train)/batch_size
    train_steps = None
    positive_weights_multiply = 1
    outputs_path = './experiments/ae'
    weights_name = f'weights{args.model_epoch}.h5'
    output_weights_path = os.path.join(outputs_path, weights_name)
    initial_learning_rate = 0.0001
    training_stats = {}

    # Get Sample and Total Count From Training Data and Compute Class Weights #
    train_counts, train_pos_counts = get_sample_counts(csv_path, "train",
                                                       class_names)
    if train_steps == None:
        train_steps = int(train_counts / batch_size)
    dev_counts, _ = get_sample_counts(csv_path, "test", class_names)
    validation_steps = int(dev_counts / batch_size)
    print('***Compute Class Weights***')
    class_weights = get_class_weights(train_counts,
                                      train_pos_counts,
                                      multiply=positive_weights_multiply)
    print(class_weights)

    # Create Image Sequence #

    train_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(csv_path, "train.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=augmenter,
        steps=train_steps,
        model_epoch=args.model_epoch)

    validation_sequence = AugmentedImageSequence(
        dataset_csv_file=os.path.join(csv_path, "test.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=augmenter,
        steps=validation_steps,
        shuffle_on_epoch_end=False,
        model_epoch=args.model_epoch)

    # Build Model #
    factory = ModelFactory()
    model = factory.get_model(class_names,
                              model_name=base_model_name,
                              use_base_weights=use_base_model_weights,
                              weights_path=None,
                              input_shape=(image_dimension, image_dimension,
                                           3))

    print("** check multiple gpu availability **")
    gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
    if gpus > 1:
        print("** multi_gpu_model is used! gpus={gpus} **")
        model_train = multi_gpu_model(model, gpus)
        # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
        checkpoint = MultiGPUModelCheckpoint(
            filepath=output_weights_path,
            base_model=model,
        )
    else:
        model_train = model
        checkpoint = ModelCheckpoint(
            output_weights_path,
            save_weights_only=True,
            save_best_only=True,
            verbose=1,
        )

    auroc = MultipleClassAUROC(sequence=validation_sequence,
                               class_names=class_names,
                               weights_path=output_weights_path,
                               stats=training_stats,
                               workers=8,
                               model_epoch=args.model_epoch)
    callbacks = [
        checkpoint,
        TensorBoard(log_dir=os.path.join(outputs_path, "logs"),
                    batch_size=batch_size),
        ReduceLROnPlateau(monitor='val_loss',
                          factor=0.1,
                          patience=1,
                          verbose=1,
                          mode="min",
                          min_lr=1e-8),
        auroc,
    ]

    # Compile Model #
    print('*** Start Compiling ***')
    optimizer = Adam(lr=initial_learning_rate)
    model_train.compile(optimizer=optimizer, loss="binary_crossentropy")

    # Train #
    print("** start training **")
    history = model_train.fit_generator(
        generator=train_sequence,
        steps_per_epoch=train_steps,
        epochs=epochs,
        validation_data=validation_sequence,
        validation_steps=validation_steps,
        callbacks=callbacks,
        class_weight=class_weights,
        workers=8,
        shuffle=False,
    )
    # dump history
    print("** dump history **")
    with open(os.path.join(outputs_path, f"history{args.model_epoch}.pkl"),
              "wb") as f:
        pickle.dump({
            "history": history.history,
            "auroc": auroc.aurocs,
        }, f)
    print("** done! **")
def main():

    # default config
    output_dir = './outputs'
    base_model_name = 'InceptionResNetV2'
    class_names = Atelectasis,Cardiomegaly,Effusion,Infiltration,Mass,Nodule,Pneumonia,Pneumothorax,Consolidation,Edema,Emphysema,Fibrosis,Pleural_Thickening,Hernia
    image_source_dir = './Images'
    image_dimension = 341
    batch_size = 16
    test_steps = 1
    use_best_weights = True
    output_weights_name = weights.h5
    weights_path = '
    best_weights_path = './outputs/best_auroc.h5

    # get test sample count
    test_counts, _ = get_sample_counts(output_dir, "testt", class_names)

    # compute steps
    if test_steps == "auto":
        test_steps = int(test_counts / batch_size)
    else:
        try:
            test_steps = int(test_steps)
        except ValueError:
            raise ValueError(f"""
                test_steps: {test_steps} is invalid,
                please use 'auto' or integer.
                """)
    print(f"** test_steps: {test_steps} **")

    print("** load model **")
    if use_best_weights:
        print("** use best weights **")
        model_weights_path = best_weights_path
    else:
        print("** use last weights **")
        model_weights_path = weights_path
    model_factory = ModelFactory()
    model = model_factory.get_model(
        class_names,
        model_name=base_model_name,
        use_base_weights=False,
        weights_path=model_weights_path)

    print("** load test generator **")
    test_sequence = AugmentedImageSequence2(
        dataset_csv_file=os.path.join(output_dir, "testt.csv"),
        class_names=class_names,
        source_image_dir=image_source_dir,
        batch_size=batch_size,
        target_size=(image_dimension, image_dimension),
        augmenter=None,
        steps=test_steps,
        shuffle_on_epoch_end=False,
    )
    print("** make prediction **")
    y_hat = model.predict_generator(test_sequence, verbose=1)
    y = test_sequence.get_y_true()
    np.save('y_hat_val.npy',y_hat)
    np.save('y_val.npy',y)
    test_log_path = "./outputs/val.log")
    print(f"** write log to {test_log_path} **")
    aurocs = []
    with open(test_log_path, "w") as f:
        for i in range(len(class_names)):
            try:
                score = roc_auc_score(y[:, i], y_hat[:, i])
                aurocs.append(score)
            except ValueError:
                score = 0
            f.write(f"{class_names[i]}: {score}\n")
        mean_auroc = np.mean(aurocs)
        f.write("-------------------------\n")
        f.write(f"mean auroc: {mean_auroc}\n")
        print(f"mean auroc: {mean_auroc}")