Exemplo n.º 1
0
def load_and_get_model_for_inference(trained_model_arch,
                                     trained_checkpoint_dir, filetype,
                                     input_shape, num_classes):
    model_factory = ModelFactory()
    model = model_factory.get_model(
        trained_model_arch,
        input_shape,
        is_training=False,
        num_classes=num_classes,
        learning_rate=0.001)  # A dummy learning rate since it is test mode.
    # The ModelCheckpoint in train pipeline saves the weights inside the checkpoint directory as follows.
    if filetype == '.h5':
        weights_path = trained_checkpoint_dir + "best_model_dir-auc.h5"
        model = tf.keras.models.load_model(weights_path)
    elif filetype == 'tf':
        weights_path = os.path.join(trained_checkpoint_dir, "variables",
                                    "variables")
        model.load_weights(weights_path)
    else:
        raise ValueError(
            "The provided saved model filetype not recognized: %s" % filetype)

    print(
        "The model has been created and the weights have been loaded from: %s"
        % weights_path)
    model.summary()
    return model
Exemplo n.º 2
0
    def __init__(self, api, model_factory=None):
        self.api = api
        self.model_factory = model_factory or ModelFactory()
        self.mockups = {}

        for resource_name, resource in self.api._registry.items():
            model_class = resource._meta.object_class
            self.register(resource)
            try:
                model_factory[model_class]
            except:
                self.model_factory.register(model_class)
Exemplo n.º 3
0
 def __init__(self, config):
     self.manager = ModelManager(
         config.flavor,
         config.server,
         config.database,
         config.driver,
         config.port,
         config.schema
     )
     self.factory = ModelFactory(
         config.environment,
         config.flavor,
         FieldFactory()
     )
Exemplo n.º 4
0
def load_model():
    model_file_path = 'src/best_weights_1555982768.7076797.h5'
    #model_file_path = 'best_weights_1555982768.7076797.h5'
    model_factory = ModelFactory()
    model = model_factory.get_model(class_names,
                                    model_name=model_type,
                                    use_base_weights=False,
                                    weights_path=model_file_path,
                                    input_shape=(img_height, img_width, 3))
    optimizer = keras.optimizers.Adam(lr=1e-3, beta_1=0.9, beta_2=0.999)
    model.compile(optimizer=optimizer,
                  loss="binary_crossentropy",
                  metrics=["accuracy", "binary_accuracy"])
    model.load_weights(model_file_path)

    return model
#!/usr/bin/env python
# coding: utf-8

# In[1]:

import cv2

# In[2]:

from models import ModelType, ModelFactory

# In[3]:

model = ModelFactory(rgbpath='trained_models/rgblstm.h5',
                     trained=True).getModel(ModelType.RGB)

# In[4]:

model.summary()

# In[5]:

from keras.layers import Input
from keras.models import Model

# In[6]:

rgbinput = Input((150, 100, 3))

x = model.layers[1].layer(rgbinput)
for layer in model.layers[2:-3]:
Exemplo n.º 6
0
def main():

    print("\n###############################################################")
    print("##########################DATA PREPARATION#####################")
    print("###############################################################\n")
    ROOT_DIR = os.getcwd()
    print(ROOT_DIR)
    INPUT_DIR = os.path.join(ROOT_DIR, config.INPUT_FOLDER)
    print(INPUT_DIR)
    PATIENTS_INFO = os.path.join(INPUT_DIR, config.INFO_PATIENTS)
    print(PATIENTS_INFO)

    IMAGES_REGEX = os.path.join(INPUT_DIR, config.IMAGES_ACESS)
    images_paths = config_func.getImages(IMAGES_REGEX)
    print(images_paths[:5])

    data = pd.read_csv(PATIENTS_INFO)
    print(data.iloc[0])
    data = data.sort_values(config.IMAGE_ID, ascending=True)
    print(data.head(5))

    #ADD NEW COLUMN (PATH IMAGE) AND POPULATE WITH COHERENT PATH FOR EACH IMAGE
    data = config_func.addNewColumn_Populate_DataFrame(data, config.PATH, images_paths)
    data = data.sort_index()
    print(data.head(5))
    print(data.iloc[0][config.PATH])

    #IMPUTATE NULL VALUES
    data = config_func.impute_null_values(data, config.AGE, mean=True)
    print(data.isnull().sum())
    print(data.head(5))
    data.dx = data.dx.astype('category')
    print(data.info())

    #GET IMAGE DATASET WITH SPECIFIC SIZE
    X, Y = config_func.getDataFromImages(dataframe=data, size=config.WANTED_IMAGES)
    print(X.shape)
    print(Y.shape)
    #number_by_perc = [sum(Y == i) for i in range(len(data.dx.unique()))]

    # STRATIFY X_TEST, X_VAL AND X_TEST
    indexes = np.arange(X.shape[0])
    X_train, X_val, y_train, y_val, indeces_train, indices_val = train_test_split(X, Y, indexes, test_size=config.VALIDATION_SPLIT, shuffle=True,
                                                      random_state=config.RANDOM_STATE, stratify=Y)
    indexes = indeces_train
    X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(X_train, y_train, indexes, test_size=config.TEST_SPLIT,
                                                        shuffle=True, random_state=config.RANDOM_STATE, stratify=y_train)

    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)
    print(X_test.shape)
    print(y_test.shape)


    if config.FLAG_SEGMENT_IMAGES == 1:
        ## ---------------------------U-NET APPLICATION ------------------------------------
        dataset = Data.Data(X_train=X_train, X_val=X_val, X_test=X_test,
                         y_train=y_train, y_val=y_val, y_test=y_test)
        unet_args = (0, 0) # args doesn't matter --> any tuple is valid here, only in U-Net model

        fact = ModelFactory.ModelFactory()
        unet = fact.getModel(config.U_NET, dataset, *unet_args) # args doesn't matter

        ## check save and load predictions array to file
        PREDICTIONS_TEMP_FILE_PATH = os.path.join(INPUT_DIR, config.TEMP_ARRAYS)
        if os.path.exists(PREDICTIONS_TEMP_FILE_PATH):
            with open(PREDICTIONS_TEMP_FILE_PATH, 'rb') as f:
                predictions = np.load(f)
        else: ## if not exists
            with open(PREDICTIONS_TEMP_FILE_PATH, 'wb') as f:
                model, predictions, history = unet.template_method()
                predictions = np.array(predictions) ## transform list to numpy array
                np.save(f, predictions)

        ## create folder if not exists
        masks_path_folder = os.path.join(INPUT_DIR, config.MASKS_FOLDER)
        if not os.path.exists(masks_path_folder):
            os.makedirs(masks_path_folder)
        if not os.listdir(masks_path_folder): ## if folder is empty (no images inside)
            ## insert mask images in mask folder
            for i in range(predictions.shape[0]):
                cv2.imwrite(os.path.join(masks_path_folder, data.at[indices_train[i], config.IMAGE_ID]+'.jpg'), predictions[i])

        # plt.figure(figsize=(16, 16))
        # plt.imshow(cv2.cvtColor(self.data.X_train[2], cv2.COLOR_BGR2RGB))
        # plt.title('Original Image')
        # plt.show()
        # plt.imshow(mask, plt.cm.binary_r)
        # plt.title('Binary Mask')
        # plt.show()
        # plt.imshow(cv2.cvtColor(concatenated_mask, cv2.COLOR_BGR2RGB))
        # plt.title('Segmented Image')
        # plt.show()

    # NORMALIZE DATA
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    # ONE HOT ENCODING TARGETS
    y_train, y_val, y_test = config_func.one_hot_encoding(y_train, y_val, y_test)

    print("\n###############################################################")
    print("##########################CLASSIFICATION#######################")
    print("###############################################################\n")

    # CREATION OF DATA OBJECT
    data_obj = Data.Data(X_train=X_train, X_val=X_val, X_test=X_test,
                         y_train=y_train, y_val=y_val, y_test=y_test)

    ## INSTANCE OF MODEL FACTORY
    model_fact = ModelFactory.ModelFactory()

    ## STRATEGIES OF TRAIN INSTANCES
    undersampling = UnderSampling.UnderSampling()
    oversampling = OverSampling.OverSampling()
    data_augment = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    ## DEFINITION OF NUMBER OF CNN AND DENSE LAYERS
    args = (6,1)

    # CREATE MODEL
    alexNet = model_fact.getModel(config.ALEX_NET, data_obj, *args)

    # APPLY STRATEGIES OF TRAIN
    #alexNet.addStrategy(undersampling)
    alexNet.addStrategy(oversampling)
    alexNet.addStrategy(data_augment)

    # VALUES TO POPULATE ON CONV AND DENSE LAYERS
    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    alex_args = (
        3, # number of normal convolutional layer (+init conv)
        1, # number of stack cnn layers
        73, # number of feature maps of initial conv layer
        23, # growth rate
        1, # number of FCL Layers
        65, # number neurons of Full Connected Layer
        12# batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = alexNet.template_method(*alex_args)
    #alexNet.save(model, config.ALEX_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    ## DEFINITION OF NUMBER OF CNN AND DENSE LAYERS
    vggLayers = (5, 1)

    ## GET VGGNET MODEL
    vggnet = model_fact.getModel(config.VGG_NET, data_obj, *vggLayers)

    ## ATTRIBUTION OS TRAIN STRATEGIES
    vggnet.addStrategy(oversampling)
    vggnet.addStrategy(data_augment)

    # VALUES TO POPULATE ON CONV AND DENSE LAYERS
    vgg_args = (
        4,  # number of stack cnn layers (+ init stack)
        71,  # number of feature maps of initial conv layer
        18,  # growth rate
        1, # number of FCL Layers
        61,  # number neurons of Full Connected Layer
        12 # batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = vggnet.template_method(*vgg_args)
    #vggnet.save(model, config.VGG_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = model_fact.getModel(config.RES_NET, data_obj, *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(oversampling)
    resnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        56,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        2, # number of identity block in each (conv+identity) block
        42,  # growth rate
        12,  # batch size
    )

    # APPLY BUILD, TRAIN AND PREDICT
    #model, predictions, history = resnet.template_method(*resnet_args)
    #resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    ## PLOT FINAL RESULTS
    #config_func.print_final_results(data_obj.y_test, predictions, history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  # BLOCKS
        1  # DENSE LAYERS
    )

    valuesLayers = (
        59,  # initial number of Feature Maps
        4,  # number of dense blocks
        5,  # number of layers in each block
        11,  # growth rate
        1.0,  # compression rate
        21  # batch size
    )

    densenet = model_fact.getModel(config.DENSE_NET, data_obj, *numberLayers)

    densenet.addStrategy(oversampling)
    densenet.addStrategy(data_augment)


    #model, predictions, history = densenet.template_method(*valuesLayers)
    #densenet.save(model, config.DENSE_NET_WEIGHTS_FILE)

    #config_func.print_final_results(data_obj.y_test, predictions, history)

    ## --------------------------- ENSEMBLE OF MODELS ------------------------------------

    # get weights of all methods from files
    alexNet2 = load_model(config.ALEX_NET_WEIGHTS_FILE)
    vggnet2 = load_model(config.VGG_NET_WEIGHTS_FILE)
    #vggnet2.name = 'model_2'
    #vggnet.save(vggnet2, config.VGG_NET_WEIGHTS_FILE)
    resnet2 = load_model(config.RES_NET_WEIGHTS_FILE)
    #resnet2.name = 'model_3'
    #resnet.save(resnet2, config.RES_NET_WEIGHTS_FILE)
    densenet2 = load_model(config.DENSE_NET_WEIGHTS_FILE)
    #densenet2.name = 'model_4'
    #densenet.save(densenet2, config.DENSE_NET_WEIGHTS_FILE)

    models = [alexNet2, vggnet2, resnet2, densenet2]

    ##call ensemble method
    ensemble_model = config_func.ensemble(models=models)
    predictions = ensemble_model.predict(data_obj.X_test)
    argmax_preds = np.argmax(predictions, axis=1)  # BY ROW, BY EACH SAMPLE
    argmax_preds = keras.utils.to_categorical(argmax_preds)

    ## print final results
    config_func.print_final_results(data_obj.y_test, argmax_preds, history=None, dict=True)

    # save ensemble model
    ensemble_model.save(config.ENSEMBLE_ALL)
    del ensemble_model
def train(train_metadata_file_path,
          val_metadata_file_path,
          images_dir_path,
          out_dir,
          model_arch,
          num_classes,
          label_name=None,
          sequence_image_count=1,
          data_pipeline_mode="mode_flat_all",
          class_weight=None,
          whole_epochs=100,
          batch_size=32,
          learning_rate=0.001,
          patience=2,
          min_delta_auc=0.01,
          input_size=(224, 224, 3)):
    """
    Train a VGG16 model based on single image.

    :param train_metadata_file_path: The path to the metadata '.csv' file containing training image names.
    :param val_metadata_file_path: The path to the metadata '.csv' file containing validation image names.
    :param images_dir_path: The path containing the images.
    :param out_dir: The path to which the saved models need to be written.
    :param model_arch: The model architecture provided as string, which are present in the 'models' module.
    :param num_classes: The number of classes present in the data. If num_classes=1, it requires the 'label_name'.
    :param label_name: Required if num_classes=1. The name of the label to pick from the data.
    :param sequence_image_count: The number of images in the sequence dataset. Default: 1.
    :param data_pipeline_mode: The mode of the data pipeline. Default: "mode_flat_all".
    :param class_weight: The class_weights for imbalanced data. Example: {0: 1.0, 1: 0.5}, if class "0" is twice less
        represented than class "1" in your data. Default: None.
    :param whole_epochs: The maximum number of epochs to be trained. Note that the model maybe early-stopped. Default: 100.
    :param batch_size: The batch size used for the data. Ensure that it fits within the GPU memory. Default: 32.
    :param learning_rate: The constant learning rate to be used for the Adam optimizer. Default: 0.001.
    :param patience: The number of epochs (full train dataset) to wait before early stopping. Default: 2.
    :param min_delta_auc: The minimum delta of validation auc for early stopping after patience. Default: 0.01.
    :param input_size: The shape of the tensors returned by the data pipeline mode. Default: (224, 224, 3).

    """
    if num_classes == 1 and label_name is None:
        raise ValueError(
            "Since num_classes equals 1, the label_name must be provided.")

    train_data_epoch_subdivisions = 4
    early_stop_monitor = "val_auc"
    early_stop_min_delta = min_delta_auc
    early_stop_patience = patience * train_data_epoch_subdivisions  # One run through the train dataset.
    prefetch_buffer_size = 3  # Can be also be set to tf.data.experimental.AUTOTUNE

    os.makedirs(out_dir)

    # Build model architecture.
    model_factory = ModelFactory()
    model = model_factory.get_model(model_arch,
                                    input_size,
                                    is_training=True,
                                    num_classes=num_classes,
                                    learning_rate=learning_rate)
    print("Created the model architecture: %s" % model.name)
    model.summary()

    # Prepare the training dataset.
    print("Preparing training and validation datasets.")
    train_data_pipeline = PipelineGenerator(
        train_metadata_file_path,
        images_dir_path,  # XXX: This function calls requires this path to end with slash.
        # This needs to be handled in the PipelineGenerator.
        is_training=True,
        sequence_image_count=sequence_image_count,
        label_name=label_name,
        mode=data_pipeline_mode)
    train_dataset = train_data_pipeline.get_pipeline()
    train_dataset = train_dataset.batch(batch_size).prefetch(
        prefetch_buffer_size)

    # Prepare the validation dataset
    val_data_pipeline = PipelineGenerator(
        val_metadata_file_path,
        images_dir_path,
        is_training=False,
        sequence_image_count=sequence_image_count,
        label_name=label_name,
        mode=data_pipeline_mode)
    val_dataset = val_data_pipeline.get_pipeline()
    val_dataset = val_dataset.batch(batch_size).prefetch(prefetch_buffer_size)

    # TODO: Find a way to log the activation maps, either during training, or after the training has completed.

    # Prepare the callbacks.
    print("Preparing Tensorflow Keras Callbacks.")
    earlystop_callback = keras.callbacks.EarlyStopping(
        monitor=early_stop_monitor,
        min_delta=early_stop_min_delta,
        patience=early_stop_patience)

    # XXX: We use the HDF5 method to store the sequence models due to a bug in tensorflow TimeDistributed wrapper
    if data_pipeline_mode in PipelineGenerator.TIMESTEP_MODES:
        model_extension = ".h5"
    else:
        model_extension = ".ckpt"

    best_model_checkpoint_auc_callback = keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(out_dir, "best_model_dir-auc" + model_extension),
        mode='max',
        monitor='val_auc',
        save_best_only=True,
        save_weights_only=False,
        verbose=1)
    best_model_checkpoint_loss_callback = keras.callbacks.ModelCheckpoint(
        filepath=os.path.join(out_dir,
                              "best_model_dir-loss" + model_extension),
        mode='min',
        monitor='val_loss',
        save_best_only=True,
        save_weights_only=False,
        verbose=1)

    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=os.path.join(
        out_dir, "TBGraph"),
                                                       write_graph=True,
                                                       write_images=True)

    callbacks = [
        earlystop_callback, best_model_checkpoint_auc_callback,
        best_model_checkpoint_loss_callback, tensorboard_callback
    ]

    # Start model training.
    # Defining an 'epoch' to be a quarter of the train dataset.
    num_train_samples = train_data_pipeline.get_size()
    num_val_samples = val_data_pipeline.get_size()
    # Number of batches per one run through the train dataset.
    num_training_steps_per_whole_dataset = int(num_train_samples / batch_size)
    num_val_steps_per_whole_dataset = int(num_val_samples / batch_size)
    steps_per_epoch = int(num_training_steps_per_whole_dataset /
                          train_data_epoch_subdivisions)
    max_num_epochs = int(whole_epochs * train_data_epoch_subdivisions)
    max_train_steps = int(max_num_epochs * steps_per_epoch)

    print(
        "Number of train samples: %s, which correspond to  ~%s batches for one complete run through the "
        "train dataset. Number of validation samples: %s, which correspond to ~%s batches for complete iteration. "
        "Considering a 1/%s fraction of the train dataset as an epoch (steps_per_epoch: %s) "
        "after which validation and model checkpoints are saved. Running training for a maximum of %s steps, "
        "which correspond to max_num_epochs: %s (whole_epochs: %s). "
        "Early stopping has been set based on '%s' of min_delta of %s with a patience of %s."
        % (num_train_samples, num_training_steps_per_whole_dataset,
           num_val_samples, num_val_steps_per_whole_dataset,
           train_data_epoch_subdivisions, steps_per_epoch, max_train_steps,
           max_num_epochs, whole_epochs, early_stop_monitor,
           early_stop_min_delta, early_stop_patience))

    print("\nStarting the model training.")
    start_time = time.time()

    model.fit(train_dataset,
              epochs=max_num_epochs,
              steps_per_epoch=steps_per_epoch,
              validation_data=val_dataset,
              validation_steps=num_val_steps_per_whole_dataset,
              callbacks=callbacks,
              class_weight=class_weight)

    time_taken = time.time() - start_time
    print(
        "Training completed and the output has been saved in %s. Time taken: %s seconds."
        % (out_dir, time_taken))
Exemplo n.º 8
0
def main():

    print("\n###############################################################")
    print("##########################DATA PREPARATION#####################")
    print("###############################################################\n")

    # acess image data
    PROJECT_DIR = os.getcwd()
    INPUT_DIR = os.path.join(PROJECT_DIR,
                             config.INPUT_DIR)  # path of input directory
    IMAGES_DIR = os.path.join(INPUT_DIR, config.IMAGES_ACESS)

    # define paths for all classes (stroma, tumor, mucosa, empty, lympho, adipose, complex, debris)
    STROMA_FOLDER = os.path.join(IMAGES_DIR, config.STROMA_DIR,
                                 config.IMAGES_REGEX)
    TUMOR_FOLDER = os.path.join(IMAGES_DIR, config.TUMOR_DIR,
                                config.IMAGES_REGEX)
    MUCOSA_FOLDER = os.path.join(IMAGES_DIR, config.MUCOSA_DIR,
                                 config.IMAGES_REGEX)
    EMPTY_FOLDER = os.path.join(IMAGES_DIR, config.EMPTY_DIR,
                                config.IMAGES_REGEX)
    LYMPHO_FOLDER = os.path.join(IMAGES_DIR, config.LYMPHO_DIR,
                                 config.IMAGES_REGEX)
    ADIPOSE_FOLDER = os.path.join(IMAGES_DIR, config.ADIPOSE_DIR,
                                  config.IMAGES_REGEX)
    COMPLEX_FOLDER = os.path.join(IMAGES_DIR, config.COMPLEX_DIR,
                                  config.IMAGES_REGEX)
    DEBRIS_FOLDER = os.path.join(IMAGES_DIR, config.DEBRIS_DIR,
                                 config.IMAGES_REGEX)
    LIST_CLASSES_FOLDER = [
        STROMA_FOLDER, TUMOR_FOLDER, MUCOSA_FOLDER, EMPTY_FOLDER,
        LYMPHO_FOLDER, ADIPOSE_FOLDER, COMPLEX_FOLDER, DEBRIS_FOLDER
    ]

    # get images from all folders
    # classes targets --> 0: Stroma, 1: Tumor, 2: Mucosa, 3: Empty, 4: Lympho, 5: Adipose, 6: Complex, 7: Debris
    images = []
    labels = []
    for i, j in zip(LIST_CLASSES_FOLDER, range(config.NUMBER_CLASSES)):
        images.append(config_func.getImages(i))
        labels.extend([j for i in range(len(images[j]))])

    # flatten images list
    images = [path for sublist in images for path in sublist]

    # construct DataFrame with two columns: (image_path, target)
    data = pd.DataFrame(list(zip(images, labels)),
                        columns=[config.IMAGE_PATH, config.TARGET])

    # subsample data, if not wanted, rate 1 should be passed
    if config.SUBSAMPLE_PERCENTAGE != 1:
        data = config_func.get_subsample_of_data(1, data)
        print(data.head(5))
        print(data.shape)
        print(data[config.TARGET].value_counts())

    # get pixel data from images and respectives targets
    X, Y = config_func.resize_images(config.WIDTH, config.HEIGHT, data)
    print(X.shape)
    print(Y.shape)

    # STRATIFY X_TEST, X_VAL AND X_TEST
    X_train, X_val, y_train, y_val = train_test_split(
        X,
        Y,
        test_size=config.VALIDATION_SPLIT,
        shuffle=True,
        random_state=config.RANDOM_STATE,
        stratify=Y)

    X_train, X_test, y_train, y_test = train_test_split(
        X_train,
        y_train,
        test_size=config.TEST_SPLIT,
        shuffle=True,
        random_state=config.RANDOM_STATE,
        stratify=y_train)

    # normalization of data
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    # one-hot encoding targets
    y_train, y_val, y_test = config_func.one_hot_encoding(y_train=y_train,
                                                          y_val=y_val,
                                                          y_test=y_test)

    print("\n###############################################################")
    print("##########################CLASSIFICATION#######################")
    print("###############################################################\n")

    # creation of Data instance
    data_obj = Data.Data(X_train=X_train,
                         X_val=X_val,
                         X_test=X_test,
                         y_train=y_train,
                         y_val=y_val,
                         y_test=y_test)

    # creation of Factory model's instance
    model_factory = ModelFactory.ModelFactory()

    # creation of Factory optimization algorithms instance
    optimization_factory = OptimizerFactory.OptimizerFactory()

    # definition of train strategies instances
    data_augment = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    # number of conv layers and dense respectively
    alex_number_layers = (5, 1)

    # creation of AlexNet instance
    alexNet = model_factory.getModel(config.ALEX_NET, data_obj,
                                     *alex_number_layers)

    # apply strategies to alexNet
    alexNet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    alex_args = (
        2,  # number of normal convolutional layer (+init conv)
        2,  # number of stack cnn layers
        70,  # number of feature maps of initial conv layer
        19,  # growth rate
        1,  # number of FCL Layers
        43,  # number neurons of Full Connected Layer
        9  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = alexNet.template_method(*alex_args)
    ##alexNet.save(model, config.ALEX_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    # number of conv layers and dense respectively
    vgg_number_layers = (4, 1)

    # creation of VGGNet instance
    vggnet = model_factory.getModel(config.VGG_NET, data_obj,
                                    *vgg_number_layers)

    # apply strategies to vggnet
    vggnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)

    vgg_args = (
        4,  # number of stack cnn layers (+ init stack)
        64,  # number of feature maps of initial conv layer
        12,  # growth rate
        1,  # number of FCL Layers
        16,  # number neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = vggnet.template_method(*vgg_args)
    ##vggnet.save(model, config.VGG_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = model_factory.getModel(config.RES_NET, data_obj,
                                    *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(data_augment)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        48,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        0,  # repetition of identity block's, by default resnet-18 is 1 (1conv block + 1 identity block) for all layers
        8,  # growth rate
        config.BATCH_SIZE_ALEX_AUG,  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = resnet.template_method(*resnet_args)
    ##resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=data_obj.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  #BLOCKS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        24,  # initial number of Feature Maps
        4,  # number of dense blocks
        5,  # number of layers in each block
        12,  # growth rate
        0.5,  # compression rate
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    densenet = model_factory.getModel(config.DENSE_NET, data_obj,
                                      *numberLayers)

    densenet.addStrategy(data_augment)

    model, predictions, history = densenet.template_method(*valuesLayers)

    config_func.print_final_results(data_obj.y_test, predictions, history)
Exemplo n.º 9
0
    def __init__(self,
                 args: Args,
                 cfg: ConfigTree,
                 local_rank: int,
                 final_validate=False):
        self.args = args
        self.cfg = cfg
        self.local_rank = local_rank

        self.model_factory = ModelFactory(cfg)
        self.data_loader_factory = DataLoaderFactoryV3(cfg, final_validate)
        self.final_validate = final_validate

        self.device = torch.device(
            f'cuda:{local_rank}' if torch.cuda.is_available() else 'cpu')

        model_type = cfg.get_string('model_type')
        if model_type == '1stream':
            self.model = self.model_factory.build(local_rank)  # basic model
        elif model_type == 'multitask':
            self.model = self.model_factory.build_multitask_wrapper(local_rank)
        else:
            raise ValueError(f'Unrecognized model_type "{model_type}"')
        if not final_validate:
            self.train_loader = self.data_loader_factory.build(
                vid=False,  # need label to gpu
                split='train',
                device=self.device)
        self.validate_loader = self.data_loader_factory.build(
            vid=False, split='val', device=self.device)

        if final_validate:
            self.n_crop = cfg.get_int(
                'temporal_transforms.validate.final_n_crop')
        else:
            self.n_crop = cfg.get_int('temporal_transforms.validate.n_crop')

        self.criterion = nn.CrossEntropyLoss()

        self.learning_rate = self.cfg.get_float('optimizer.lr')
        optimizer_type = self.cfg.get_string('optimizer.type', default='sgd')
        if optimizer_type == 'sgd':
            self.optimizer = torch.optim.SGD(
                self.model.parameters(),
                lr=self.learning_rate,
                momentum=self.cfg.get_float('optimizer.momentum'),
                dampening=self.cfg.get_float('optimizer.dampening'),
                weight_decay=self.cfg.get_float('optimizer.weight_decay'),
                nesterov=self.cfg.get_bool('optimizer.nesterov'),
            )
        elif optimizer_type == 'adam':
            self.optimizer = torch.optim.Adam(
                self.model.parameters(),
                lr=self.learning_rate,
                eps=self.cfg.get_float('optimizer.eps'),
            )
        else:
            raise ValueError(f'Unknown optimizer {optimizer_type})')

        self.num_epochs = cfg.get_int('num_epochs')
        self.schedule_type = self.cfg.get_string('optimizer.schedule')
        if self.schedule_type == "plateau":
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer=self.optimizer,
                mode='min',
                patience=self.cfg.get_int('optimizer.patience'),
                verbose=True)
        elif self.schedule_type == "multi_step":
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                optimizer=self.optimizer,
                milestones=self.cfg.get("optimizer.milestones"),
            )
        elif self.schedule_type == "cosine":
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer=self.optimizer,
                T_max=self.num_epochs,
                eta_min=self.learning_rate / 1000)
        elif self.schedule_type == 'none':
            self.scheduler = torch.optim.lr_scheduler.LambdaLR(
                optimizer=self.optimizer,
                lr_lambda=lambda epoch: 1,
            )
        else:
            raise ValueError("Unknow schedule type")

        self.arch = cfg.get_string('model.arch')

        if local_rank == 0:
            self.summary_writer = SummaryWriter(
                log_dir=str(args.experiment_dir))
        else:
            self.summary_writer = None

        self.best_acc1 = 0.
        self.current_epoch = 0
        self.next_epoch = None
        logger.info('Engine: n_crop=%d', self.n_crop)

        self.checkpoint_manager = CheckpointManager(self.args.experiment_dir,
                                                    keep_interval=None)
        self.loss_meter = None
Exemplo n.º 10
0
 def __init__(self, image_shape, io):
     self.mf = ModelFactory(image_shape)
     self.io = io
Exemplo n.º 11
0
def main():
    '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''
    '    DATA PREPARATION (PRE-PROCESSING, CLEAN, TRANSFORM)  '
    ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''
    print("#################", "DATA PREPARATION", "####################\n")
    # CREATION OF DATAFRAME WITH ALL IMAGES --> [ID_PATIENT, PATH_IMAGE, TARGET]
    data = pd.DataFrame(index=np.arange(0, config.SIZE_DATAFRAME),
                        columns=[config.ID, config.IMAGE_PATH, config.TARGET])

    # POPULATE DATAFRAME
    data = config_func.populate_DataFrame(data)

    #TRANSFORM DATA INTO NUMPY ARRAY'S
    X, Y = config_func.resize_images(config.WIDTH, config.HEIGHT, data)

    #DIVISION OF DATASET'S BETWEEN TRAIN, VALIDATION AND TEST --> I NEED ATTENTION, BECAUSE CLASSES ARE UNBALANCED
    indexes = np.arange(X.shape[0])
    X_train, X_val, y_train, y_val, indeces_train, indices_val = train_test_split(
        X,
        Y,
        indexes,
        test_size=config.VALIDATION_SIZE,
        stratify=Y,
        shuffle=True,
        random_state=config.RANDOM_STATE
    )  #RANDOM STATE IS NEEDED TO GUARANTEES REPRODUCIBILITY
    indexes = indeces_train
    X_train, X_test, y_train, y_test, indices_train, indices_test = train_test_split(
        X_train,
        y_train,
        indexes,
        test_size=config.TEST_SIZE,
        stratify=y_train,
        shuffle=True,
        random_state=config.RANDOM_STATE)
    print(X_train.shape)
    print(X_val.shape)
    print(X_test.shape)

    #NORMALIZE DATA
    X_train, X_val, X_test = config_func.normalize(X_train, X_val, X_test)

    #ONE HOT ENCODING TARGETS
    y_train, y_val, y_test = config_func.one_hot_encoding(
        y_train, y_val, y_test)
    print("#################", "DATA PREPARATION CONCLUDED",
          "####################\n")

    #CREATE OBJECT DATA
    d = Data.Data(X_train=X_train,
                  X_val=X_val,
                  X_test=X_test,
                  y_train=y_train,
                  y_val=y_val,
                  y_test=y_test)

    factoryModel = ModelFactory.ModelFactory()
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    ## STRATEGIES OF TRAIN INSTANCES

    underSampling = UnderSampling.UnderSampling()
    data_aug = DataAugmentation.DataAugmentation()

    ## ---------------------------ALEXNET APPLICATION ------------------------------------

    ## DICTIONARIES DEFINITION
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        2,  ## number of normal convolutional layers
        2,  ## number of stacked cnn layers
        16,  ## number of feature maps of first conv layer
        16,  ## growth rate
        2,  ## number of FCL's preceding output layer (sigmoid layer)
        16,  ## number of neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  #batch size
    )

    # CREATION OF MODEL
    alexNetModel = factoryModel.getModel(config.ALEX_NET, d, *numberLayers)

    ## APPLY STRATEGIES OF TRAIN
    alexNetModel.addStrategy(underSampling)
    alexNetModel.addStrategy(data_aug)

    #model, predictions, history = alexNetModel.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ---------------------------VGGNET APPLICATION ------------------------------------

    ## DICTIONARIES DEFINITION
    numberLayers = (
        4,  #CNN LAYERS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        5,  # conv stacks
        24,  # number of feature maps of initial convolution layer
        16,  # growth rate
        1,  ## number of FCL's preceding output layer (sigmoid layer)
        16,  # number neurons of Full Connected Layer
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    vggNetModel = factoryModel.getModel(config.VGG_NET, d, *numberLayers)

    vggNetModel.addStrategy(underSampling)
    vggNetModel.addStrategy(data_aug)

    #model, predictions, history = vggNetModel.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ---------------------------RESNET APPLICATION ------------------------------------

    # number of conv and dense layers respectively
    number_cnn_dense = (5, 1)

    # creation of ResNet instance
    resnet = factoryModel.getModel(config.RES_NET, d, *number_cnn_dense)

    # apply strategies to resnet
    resnet.addStrategy(underSampling)
    resnet.addStrategy(data_aug)

    # definition of args to pass to template_method (conv's number of filters, dense neurons and batch size)
    resnet_args = (
        48,  # number of filters of initial CNN layer
        4,  # number of consecutive conv+identity blocks
        1,  # repetition of identity block's, by default resnet-18 is 1 (1conv block + 1 identity block) for all layers
        8,  # growth rate
        config.BATCH_SIZE_ALEX_AUG,  # batch size
    )

    # apply build, train and predict
    #model, predictions, history = resnet.template_method(*resnet_args)
    ##resnet.save(model, config.RES_NET_WEIGHTS_FILE)

    # print final results
    #config_func.print_final_results(y_test=d.y_test, predictions=predictions, history=history, dict=False)

    ## ---------------------------DENSENET APPLICATION ------------------------------------

    # # DICTIONARIES DEFINITION
    numberLayers = (
        4,  #BLOCKS
        1  #DENSE LAYERS
    )

    valuesLayers = (
        24,  # initial number of Feature Maps
        5,  # number of dense blocks
        2,  # number of layers in each block
        12,  # growth rate
        0.5,  # compression rate
        config.BATCH_SIZE_ALEX_AUG  # batch size
    )

    densenet = factoryModel.getModel(config.DENSE_NET, d, *numberLayers)

    densenet.addStrategy(underSampling)
    densenet.addStrategy(data_aug)

    #model, predictions, history = densenet.template_method(*valuesLayers)

    #config_func.print_final_results(d.y_test, predictions, history)

    ## ------------------------PSO OPTIMIZATION ------------------------------------------

    #PSO OPTIMIZATION
    optFact = OptimizerFactory.OptimizerFactory()

    # definition optimizers for models
    pso_alex = optFact.createOptimizer(config.PSO_OPTIMIZER, alexNetModel,
                                       *config.pso_init_args_alex)
    pso_vgg = optFact.createOptimizer(config.PSO_OPTIMIZER, vggNetModel,
                                      *config.pso_init_args_vgg)
    pso_res = optFact.createOptimizer(config.PSO_OPTIMIZER, resnet,
                                      *config.pso_init_args_resnet)
    pso_dense = optFact.createOptimizer(config.PSO_OPTIMIZER, densenet,
                                        *config.pso_init_args_densenet)

    # call optimize function
    cost, pos, optimizer = pso_alex.optimize()

    #plot cost history and plot position history
    print("Custo: {}".format(cost))
    config_func.print_Best_Position_PSO(pos, config.ALEX_NET)  # print position
    pso_alex.plotCostHistory(optimizer=optimizer)
    pso_alex.plotPositionHistory(optimizer, np.array(config.X_LIMITS),
                                 np.array(config.Y_LIMITS), config.POS_VAR_EXP,
                                 config.LABEL_X_AXIS, config.LABEL_Y_AXIS)