Exemplo n.º 1
0
def run_lr(X, y, kf=None):
    tag = "Acc/LR"

    clf_cv = LogisticRegression(max_iter=100000)

    get_prediction_kf(kf, clf_cv, X, y, tag)
    log.info("")
Exemplo n.º 2
0
def test_model(model, test_loader, iterator=0):
    correct = 0
    total = len(test_loader.dataset)

    prediction_list, label_list = [], []

    # set the model into evaluation mode
    model = model.eval()

    # behavior of the batch norm layer so that it is not sensitive to batch size
    with torch.no_grad():
        # Iterate through test set mini batches
        for e, (images, labels) in enumerate(tqdm(test_loader)):
            # Forward pass
            inputs = images.to(device)
            labels = labels.to(device)
            y = model(inputs)

            predictions = torch.argmax(y, dim=1)
            prediction_list.extend([p.item() for p in predictions])
            label_list.extend([l.item() for l in labels])

            truths = torch.sum((predictions == labels).float()).item()
            correct += truths

    acc = (correct / total)
    log.info('\nTest accuracy: {}'.format(acc))
    if iterator != 0:
        writer.add_scalar(MODEL_NAME[0] + "/Acc/Validation", acc, iterator)

    conf_matrix = confusion_matrix(label_list, prediction_list)
    log.info("Confusion Matrix:\n_________\n| {} | {} | \n|---|---|\n| {} | {} |\n_________"
             .format(conf_matrix[0, 0], conf_matrix[0, 1], conf_matrix[1, 0], conf_matrix[1, 1]))

    return 100 * acc
Exemplo n.º 3
0
def run_svm(X, y, kf=None):
    tag = "Acc/SVM"

    svc_cv = SVC(probability=True)

    get_prediction_kf(kf, svc_cv, X, y, tag)
    log.info("")
Exemplo n.º 4
0
def run_knn(X, y, kf=None):
    tag = "Acc/KNN"

    neigh_cv = KNeighborsClassifier(n_neighbors=len(set(y)))

    get_prediction_kf(kf, neigh_cv, X, y, tag)
    log.info("")
Exemplo n.º 5
0
def main(model_name, dataset_folder, seed, cv=5, img_size=112, normalize=True):

    kf = KFold(n_splits=cv, shuffle=True, random_state=seed)

    log.info("Constructing datasets and arrays")
    X, y = get_dataset(dataset_folder, img_size, normalize, divide=False)

    log.info("Calling the model: " + model_name)
    run_model(model_name, X, y, kf)

    collect_garbage()
Exemplo n.º 6
0
def validate_model(model, test_loader, metric, iterator, save):
    correct = 0
    total = len(test_loader.dataset)

    # set the model into evaluation mode
    model = model.eval()
    metric = metric.eval()

    # behavior of the batch norm layer so that it is not sensitive to batch size
    with torch.no_grad():
        # Iterate through test set mini batches
        for e, (images, labels) in enumerate(tqdm(test_loader)):
            # Forward pass
            inputs = images.to(device)
            labels = labels.to(device)
            outputs = model(inputs)

            predictions = torch.argmax(outputs, dim=1)
            truths = torch.sum((predictions == labels).float()).item()
            correct += truths

            loss = metric(outputs, labels).item()

    val_acc = correct / total
    writer.add_scalar(MODEL_NAME[0] + "/Loss/Validation", loss, iterator)
    writer.add_scalar(MODEL_NAME[0] + "/Acc/Validation", val_acc, iterator)
    log.info("{}th Validation --> Loss: {} - Accuracy: {}".format(
        iterator, round(loss, 6), round(val_acc, 6)))

    if save and is_verified(100 * val_acc):
        exist_files = path_exists(ROOT_DIR, SAVE_FILE[0], "contains")

        better = len(exist_files) == 0
        if not better:
            exist_acc = []
            for file in exist_files:
                exist_acc.append(float(file.split("_")[0].replace(",", ".")))
            better = all(100 * val_acc > acc for acc in exist_acc)

        if better:
            save_model(model=model,
                       path=str(round(100 * val_acc, 2)) + "_" + SAVE_FILE[0])
Exemplo n.º 7
0
def weighted_model(model_name, pretrain_file, use_actual_num_classes=False):
    out_file = ROOT_DIR + "/" + pretrain_file + ".pth"

    if model_name == models.alexnet.__name__:
        model = models.alexnet(
            num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.resnet18.__name__:
        model = models.resnet18(
            num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.resnet50.__name__:
        model = models.resnet50(
            num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.resnet152.__name__:
        model = models.resnet152(
            num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.vgg16.__name__:
        model = models.vgg16(num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.vgg19.__name__:
        model = models.vgg19(num_classes=2 if use_actual_num_classes else 1000)

    elif model_name == models.densenet169.__name__:
        model = models.densenet169(
            num_classes=2 if use_actual_num_classes else 1000)

    else:
        log.fatal("model name is not known: " + model_name)
        sys.exit(1)

    try:
        log.info("Using class size as: {}".format(
            2 if use_actual_num_classes else 1000))
        return load_model(model, out_file)
    except RuntimeError as re:
        log.error(re)
        return weighted_model(model_name, pretrain_file, True)
Exemplo n.º 8
0
def run_model(model_name, X, y, kf):
    collect_garbage()

    if model_name == "svm":
        run_svm(X=X, y=y, kf=kf)

    elif model_name == "lr":
        run_lr(X=X, y=y, kf=kf)

    elif model_name == "knn":
        run_knn(X=X, y=y, kf=kf)

    elif model_name == "all":
        log.info("Running ML model: svm")
        run_svm(X=X, y=y, kf=kf)

        log.info("Running ML model: lr")
        run_lr(X=X, y=y, kf=kf)

        log.info("Running ML model: knn")
        run_knn(X=X, y=y, kf=kf)

    else:
        log.fatal("ML model name is not known: " + model_name)
        sys.exit(1)
Exemplo n.º 9
0
def get_dataset(dataset_folder, img_size, normalize, divide=False):
    log.info("Reading dataset")
    X, y = read_dataset(dataset_folder=dataset_folder,
                        resize_value=(img_size, img_size),
                        to_crop=True)

    if normalize:
        X = StandardScaler().fit_transform(X)

    if divide:
        log.info("Dividing dataset into train and test data")
        X_tr, y_tr, X_ts, y_ts = divide_dataset(X, y)
        log.info("Train data length: %d" % len(y_tr))
        log.info("Test data length: %d" % len(y_ts))

        return X_tr, y_tr, X_ts, y_ts

    return X, y
Exemplo n.º 10
0
def main(save=False,
         dataset_folder="dataset",
         batch_size=20,
         img_size=112,
         test_without_train=False,
         pretrain_file=None,
         num_workers=4,
         model_name='alexnet',
         optimizer_name='Adam',
         is_pre_trained=False,
         fine_tune=False,
         num_epochs=18,
         update_lr=True,
         normalize=None,
         validation_freq=0.1,
         lr=0.001,
         momentum=0.9,
         partial=0.125,
         betas=(0.9, 0.99),
         weight_decay=0.025):
    if not is_pre_trained and fine_tune:
        fine_tune = False

    if test_without_train and pretrain_file is None:
        log.fatal(
            "Pretrained weight file is a must on test without train approach")
        sys.exit(1)

    log.info("Constructing datasets and loaders")
    train_data, train_loader, test_data, test_loader = set_dataset_and_loaders(
        dataset_folder, batch_size, img_size, num_workers, normalize)

    set_0, set_1 = 0, 0
    for imgs, labels in test_loader:
        if set_0 == 3 and set_1 == 3:
            break

        for e, label in enumerate(labels.tolist()):
            if label == 0 and set_0 != 3:
                writer.add_image(
                    "{} - class image sample {}".format(
                        train_data.classes[0], set_0),
                    inv_normalize_tensor(imgs[e], normalize))
                set_0 += 1
            elif label == 1 and set_1 != 3:
                writer.add_image(
                    "{} - class image sample {}".format(
                        train_data.classes[1], set_1),
                    inv_normalize_tensor(imgs[e], normalize))
                set_1 += 1

            if set_0 == 3 and set_1 == 3:
                break

    log.info("Calling the model: " + model_name)
    if test_without_train:
        model = weighted_model(model_name, pretrain_file)
        test_model(model, test_loader, 0)

    else:
        run_model(model_name=model_name,
                  optimizer_name=optimizer_name,
                  is_pre_trained=is_pre_trained,
                  fine_tune=fine_tune,
                  train_loader=train_loader,
                  test_loader=test_loader,
                  num_epochs=num_epochs,
                  save=save,
                  update_lr=update_lr,
                  dataset_folder=dataset_folder,
                  validation_freq=validation_freq,
                  lr=lr,
                  momentum=momentum,
                  partial=partial,
                  betas=betas,
                  weight_decay=weight_decay)

    collect_garbage()
    writer.close()
Exemplo n.º 11
0
        run_model(model_name=model_name,
                  optimizer_name=optimizer_name,
                  is_pre_trained=is_pre_trained,
                  fine_tune=fine_tune,
                  train_loader=train_loader,
                  test_loader=test_loader,
                  num_epochs=num_epochs,
                  save=save,
                  update_lr=update_lr,
                  dataset_folder=dataset_folder,
                  validation_freq=validation_freq,
                  lr=lr,
                  momentum=momentum,
                  partial=partial,
                  betas=betas,
                  weight_decay=weight_decay)

    collect_garbage()
    writer.close()


if __name__ == '__main__':
    save = False
    log.info("Process Started")
    main(model_name="resnet18",
         is_pre_trained=True,
         pretrain_file="84.35_PreTrained_resnet18_Adam_dataset_out",
         img_size=112,
         test_without_train=True)
    log.info("Process Finished")
Exemplo n.º 12
0
def main(transfer_learning, method="", ml_model_name="", cv=10, dataset_folder="dataset",
         pretrain_file=None, batch_size=8, img_size=112, num_workers=4, cnn_model_name="", optimizer_name='Adam',
         validation_freq=0.1, lr=0.001, momentum=0.9, partial=0.125, betas=(0.9, 0.99), weight_decay=0.025,
         update_lr=True, is_pre_trained=False, fine_tune=False, num_epochs=16, normalize=True, seed=17):

    if not transfer_learning:
        if method.lower() == "ml":
            run_ML.main(model_name=ml_model_name, dataset_folder=dataset_folder, seed=seed, cv=cv,
                        img_size=img_size, normalize=normalize)
        elif method.lower() == "cnn":
            run_CNN.main(save=False, dataset_folder=dataset_folder, batch_size=batch_size, test_without_train=False,
                         img_size=img_size, num_workers=num_workers, num_epochs=num_epochs, model_name=cnn_model_name,
                         optimizer_name=optimizer_name, is_pre_trained=is_pre_trained, fine_tune=fine_tune,
                         update_lr=update_lr, normalize=normalize, validation_freq=validation_freq, lr=lr,
                         momentum=momentum, partial=partial, betas=betas, weight_decay=weight_decay)
        else:
            log.fatal("method name is not known: " + method)
            sys.exit(1)

    else:
        log.info("Constructing datasets and loaders")
        train_data, train_loader, test_data, test_loader = set_dataset_and_loaders(dataset_folder=dataset_folder,
                                                                                   batch_size=batch_size,
                                                                                   img_size=img_size,
                                                                                   num_workers=num_workers,
                                                                                   normalize=normalize)

        if is_pre_trained and pretrain_file is not None and \
                cnn_model_name in pretrain_file.lower():
            log.info("Getting PreTrained CNN model: " + cnn_model_name + " from the Weights of " + pretrain_file)
            model = cnn_model.weighted_model(cnn_model_name, pretrain_file)

        else:
            log.info("Running CNN model: " + cnn_model_name)
            model = cnn_model.run_model(model_name=cnn_model_name, optimizer_name=optimizer_name, fine_tune=fine_tune,
                                        is_pre_trained=is_pre_trained, train_loader=train_loader, num_epochs=num_epochs,
                                        test_loader=test_loader, validation_freq=validation_freq, lr=lr,
                                        momentum=momentum, partial=partial, betas=betas, weight_decay=weight_decay,
                                        update_lr=update_lr, save=False, dataset_folder=dataset_folder)

        log.info("Feature extractor is being created")
        feature_extractor = get_feature_extractor(cnn_model_name, model.eval())
        log.info("Feature extractor is setting to device: " + str(device))
        feature_extractor = feature_extractor.to(device)

        log.info("Merging CNN train&test datasets")
        dataset = train_data + test_data

        log.info("Constructing loader for merged dataset")
        data_loader = set_loader(dataset=dataset, batch_size=int(len(dataset) / 5), shuffle=False,
                                 num_workers=num_workers)
        log.info("Extracting features as X_cnn array and labels as general y vector")
        X_cnn, y = extract_features(data_loader, feature_extractor)
        class_dist = {i: y.count(i) for i in y}
        class0_size = class_dist[0]
        class1_size = class_dist[1]
        log.info("Total class 0 size: " + str(class0_size))
        log.info("Total class 1 size: " + str(class1_size))

        if normalize:
            X_cnn = Normalizer().fit_transform(X_cnn)
        X_cnn = StandardScaler().fit_transform(X_cnn)

        log.info("Number of features in X_cnn: " + str(len(X_cnn[0])))

        kf = KFold(n_splits=cv, shuffle=True, random_state=seed)

        ml_model.run_model(ml_model_name, X_cnn, y, kf)

    collect_garbage()
Exemplo n.º 13
0
def run_model(model_name,
              optimizer_name,
              is_pre_trained,
              fine_tune,
              train_loader,
              test_loader,
              validation_freq,
              lr,
              momentum,
              partial,
              betas,
              weight_decay,
              update_lr=True,
              num_epochs=25,
              save=False,
              dataset_folder="dataset"):
    collect_garbage()

    MODEL_NAME[0] = model_name

    num_classes = len(train_loader.dataset.classes)

    log.info("Instantiate the model")
    if model_name == models.alexnet.__name__:
        model = prepare_alexnet(is_pre_trained, fine_tune, num_classes)

    elif model_name in (models.resnet18.__name__, models.resnet50.__name__,
                        models.resnet152.__name__):
        model = prepare_resnet(model_name, is_pre_trained, fine_tune,
                               num_classes)

    elif model_name in (models.vgg16.__name__, models.vgg19.__name__):
        model = prepare_vgg(model_name, is_pre_trained, fine_tune, num_classes)

    elif model_name == models.densenet169.__name__:
        model = prepare_densenet(is_pre_trained, fine_tune, num_classes)

    else:
        log.fatal("model name is not known: " + model_name)
        sys.exit(1)

    log.info("Setting the model to device")
    model = model.to(device)

    if "densenet" not in model_name:
        log.info("The summary:")
        get_summary(model, train_loader)

    collect_garbage()

    log.info("Setting the loss function")
    metric = nn.CrossEntropyLoss()

    model_parameters = get_grad_update_params(model, fine_tune)

    if optimizer_name == optim.Adam.__name__:
        optimizer = optim.Adam(model_parameters, lr=lr)
    elif optimizer_name == optim.SGD.__name__:
        optimizer = optim.SGD(model_parameters, lr=lr, momentum=momentum)
    elif optimizer_name == padam.Padam.__name__:
        optimizer = padam.Padam(model_parameters,
                                lr=lr,
                                partial=partial,
                                weight_decay=weight_decay,
                                betas=betas)
    else:
        log.fatal("not implemented optimizer name: {}".format(optimizer_name))
        sys.exit(1)

    log.info("Setting the optimizer as: {}".format(optimizer_name))

    SAVE_FILE[0] = (
        "" if not is_pre_trained else "PreTrained_"
    ) + model_name + "_" + optimizer_name + "_" + dataset_folder + "_out.pth"

    last_val_iterator = train_model(model,
                                    train_loader,
                                    test_loader,
                                    metric,
                                    optimizer,
                                    lr=lr,
                                    num_epochs=num_epochs,
                                    update_lr=update_lr,
                                    validation_freq=validation_freq,
                                    save=save)

    log.info("Testing the model")
    test_acc = test_model(model, test_loader, last_val_iterator)

    if save and is_verified(test_acc):
        exist_files = path_exists(ROOT_DIR, SAVE_FILE[0], "contains")

        better = len(exist_files) == 0
        if not better:
            exist_acc = []
            for file in exist_files:
                exist_acc.append(float(file.split("_")[0].replace(",", ".")))
            better = all(test_acc > acc for acc in exist_acc)
        if better:
            save_model(model=model,
                       path=str(round(test_acc, 2)) + "_" + SAVE_FILE[0])

    return model
Exemplo n.º 14
0
def get_prediction_kf(kf, model, X, y, tag=None):
    cv = kf.n_splits
    ratios = []
    conf_matrices = []
    roc_list = []
    for e, (train, test) in enumerate(kf.split(X, y)):
        if not isinstance(X, np.ndarray):
            X = np.array(X)

        X_train, X_test, y_train, y_test = X[train], X[test], np.array(
            y)[train], np.array(y)[test]

        model.fit(X_train, y_train)
        success_ratio = model.score(X_test, y_test)
        log.info(
            str(cv) + "-Fold CV -- Iteration " + str(e) +
            " Test Success Ratio: " + str(100 * success_ratio) + "%")
        ratios.append(success_ratio)

        test_prob = model.predict_proba(X_test)
        auc = roc_auc_score(y_test, test_prob[:, 1])
        log.info(
            str(cv) + "-Fold CV -- Iteration " + str(e) + " AUC Score: " +
            str(auc))
        roc_list.append(auc)

        conf_matrix = confusion_matrix(y_test.tolist(),
                                       model.predict(X_test).tolist())
        log.info(
            str(cv) + "-Fold CV -- Iteration " + str(e) +
            " Confusion Matrix:\n" + str(conf_matrix))
        conf_matrices.append(conf_matrix)

        if tag is not None:
            writer.add_scalar(tag, success_ratio, e)

    log.info(
        str(cv) + "-Fold CV Average Test Success Ratio: " +
        str(100 * np.average(np.array(ratios))) + "%")
    log.info(
        str(cv) + "-Fold CV Average AUC Score: " +
        str(np.average(np.array(roc_list))))
    log.info(
        str(cv) + "-Fold CV Average Confusion Matrix:\n" +
        str(np.mean(conf_matrices, axis=0)))
Exemplo n.º 15
0
def get_prediction(model, X_ts, y_ts):
    log.info("Test Success Ratio: " + str(100 * model.score(X_ts, y_ts)) + '%')
Exemplo n.º 16
0
import os

import torch

from util.logger_util import log

ROOT_DIR = str(os.path.dirname(os.path.abspath(__file__)))


# os.environ["KERAS_BACKEND"] = "plaidml.keras.backend"
# log.info("envion is set as: %s" % str(os.environ["KERAS_BACKEND"]))

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
log.info("Device is selected as %s" % device)
SAVE_FILE = [""]
MODEL_NAME = [""]

Exemplo n.º 17
0
def set_dataset_and_loaders(dataset_folder,
                            batch_size,
                            img_size,
                            num_workers,
                            normalize=None):

    dataset_dir = ROOT_DIR.split("cnn")[0]

    log.info("Setting train data")
    train_data = set_dataset(folder=dataset_dir + dataset_folder + '/train',
                             size=img_size,
                             normalize=normalize)
    log.info("Train data length: %d" % len(train_data))
    log.info("Setting test data")
    test_data = set_dataset(folder=dataset_dir + dataset_folder + '/test',
                            size=img_size,
                            normalize=normalize)
    log.info("Test data length: %d" % len(test_data))

    log.info("Setting train loader")
    train_loader = set_loader(dataset=train_data,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=num_workers)
    log.info("Setting test loader")
    test_loader = set_loader(dataset=test_data,
                             batch_size=batch_size,
                             shuffle=False,
                             num_workers=num_workers)

    return train_data, train_loader, test_data, test_loader
Exemplo n.º 18
0
def train_model(model,
                train_loader,
                test_loader,
                metric,
                optimizer,
                lr,
                validation_freq,
                save,
                num_epochs=25,
                update_lr=False):
    total_loss_history = []
    total_acc_history = []
    validate_every = max(1, math.floor(num_epochs * validation_freq))
    last_validate_iter = 0

    log.info("Training the model")
    # Iterate through train set mini batches
    for epoch in trange(num_epochs):
        correct = 0
        total = len(train_loader.dataset)
        update = update_lr
        loss_history = []
        for e, (images, labels) in enumerate(train_loader):  # tqdm
            # zero the parameter gradients
            optimizer.zero_grad()

            inputs = images.to(device)
            labels = labels.to(device)

            # Do the forward pass
            outputs = model(inputs)

            predictions = torch.argmax(outputs, dim=1)
            truths = torch.sum((predictions == labels).float())
            correct += truths.item()

            loss = metric(outputs, labels)
            loss_history.append(loss.item())

            if update \
                    and (epoch != 0 and epoch != num_epochs - 1)\
                    and e == len(train_loader) - 1 \
                    and (epoch + 1) % int(num_epochs / 4) == 0:
                update = False
                lr = float(lr / 10)
                log.info("learning rate is updated to " + str(lr))
                optimizer = optim.Adam(optimizer.param_groups, lr=lr)

            # Calculate gradients and step
            loss.backward()
            optimizer.step()

        log.info("\nIteration number on epoch %d / %d is %d" %
                 (epoch + 1, num_epochs, len(loss_history)))
        epoch_loss = sum(loss_history) / len(loss_history)
        writer.add_scalar(MODEL_NAME[0] + "/Loss/Train", epoch_loss, epoch)
        total_loss_history.append(epoch_loss)
        epoch_acc = correct / total
        writer.add_scalar(MODEL_NAME[0] + "/Acc/Train", epoch_acc, epoch)
        total_acc_history.append(epoch_acc)
        log.info("Epoch {} --> training loss: {} - training acc: {}".format(
            epoch + 1, round(epoch_loss, 4), round(epoch_acc, 4)))

        if epoch % validate_every == 0 and epoch != (num_epochs - 1):
            last_validate_iter = int(epoch / validate_every)
            validate_model(model, test_loader, metric, last_validate_iter,
                           save)
            model = model.train()
            metric = metric.train()

    log.info("\nTotal training iteration: %d" % len(total_loss_history))
    total_loss = sum(total_loss_history) / len(total_loss_history)
    total_acc = sum(total_acc_history) / len(total_acc_history)
    log.info("Average --> training loss: {} - training acc: {} ".format(
        round(total_loss, 6), round(total_acc, 6)))

    return last_validate_iter