示例#1
0
def main():
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")

    # make directory to save train history and model
    os.makedirs(args.result_dir, exist_ok=True)
    args2json(args, args.result_dir)

    # laod dataset and set k-fold cross validation
    D = LoadDataset(args.data_dir, args.batch_size_train, args.batch_size_test)
    train_loader, test_loader = D()

    # model, loss_function, optimizer
    model = Net().to(device)
    loss_function = CrossEntropy()
    optimizer = torch.optim.Adam(model.parameters(), weight_decay=0.01)

    # train and test
    history = []
    for e in range(args.epochs):
        train_loss = train(model, device, train_loader, optimizer,
                           loss_function)
        test_loss, acc = test(model, device, test_loader, loss_function)
        history.append([train_loss, test_loss, acc])
        show_progress(e + 1, args.epochs, train_loss, test_loss, acc)

    # save train history and model
    save_history(history, args.result_dir)
    save_model(model, args.result_dir)
def anomaly():
    """
    Testing models ability to find data anomalies
    !!! not working in current version
    """

    #Load anomaly dataset
    anomaly_data = LoadDataset("dataset/kaggle_anomalies/", 0)
    anomaly_data, anomaly_label, val, val_label = anomaly_data.load_data()
    for i in range(len(anomaly_label)):
        anomaly_label[i] = anomaly_label[i] + 5

    #Concatinate test and anomaly
    test_anomaly_data = np.vstack((test_data, anomaly_data))
    test_anomaly_label = np.hstack((test_label, anomaly_label))
    """# Get k-means cluster distance
def getDECNetworkResults(dec, enc):
    #Load test dataset
    test_data = LoadDataset("dataset/kaggle_original_train/", 0)
    test_data, test_label, val, val_label = test_data.load_data()

    big_data = LoadDataset("dataset/kaggle_augmented_train_new/", 0)
    big_data, _, _, _ = big_data.load_data()

    # make save directory
    os.makedirs(os.path.join("dec"), exist_ok=True)
    os.chdir("dec")

    encoded = enc.predict(test_data)
    q, _ = dec.predict(test_data, verbose=0)
    y_pred = q.argmax(1)

    print(y_pred)
    confusion_matrix(test_label.astype(np.int64), y_pred)

    #Take prediction time
    for i in range(20):
        iterate = 5000 * (i + 1)
        data = big_data[0:iterate, :]
        print(data.shape)
        print("KMEAN")
        start = time.time()
        q, _ = dec.predict(data, verbose=0)
        y_pred = q.argmax(1)
        end = time.time()
        print(end - start)

    train_x = np.reshape(test_data, (3720, 64, 64))

    TSNE = TSNEAlgo()
    TSNE.tsne_fit(encoded, perplexity=35)

    TSNE.tsne_plot(train_x,
                   y_pred.astype(int),
                   save_name="Pred",
                   save_data_dir="dec")
    TSNE.tsne_plot(train_x,
                   test_label.astype(int),
                   save_name="True",
                   save_data_dir="dec")
示例#4
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

from explainer_tabular import LimeTabularExplainer
from load_dataset import LoadDataset

test = LoadDataset(which='ildp')
X = test.data.data

feature_names = test.data.feature_names
target_names = test.data.target_names

# train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80)
# np.save("X_train_ildp.npy", train)
# np.save("X_test_ildp.npy", test)
# np.save("y_train_ildp.npy", labels_train)
# np.save("y_test_ildp.npy", labels_test)

train = np.load("data/X_train_ildp.npy")
test = np.load("data/X_test_ildp.npy")
labels_train = np.load("data/y_train_ildp.npy")
labels_test = np.load("data/y_test_ildp.npy")

rf = RandomForestClassifier(n_estimators=10, random_state=0)
rf.fit(train, labels_train)
i = np.random.randint(0, test.shape[0])
示例#5
0
def train(net, device, epochs=1000, batch_size=1, lr=0.001):
    dataset = LoadDataset(dir_img, dir_mask, mask_suffix='_mask')
    n_val = int(len(dataset) * 0.2)
    n_train = len(dataset) - n_val
    train, val = random_split(dataset, [n_train, n_val])
    train_loader = DataLoader(train,
                              batch_size=batch_size,
                              shuffle=True,
                              num_workers=8,
                              pin_memory=True)
    val_loader = DataLoader(val,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=8,
                            pin_memory=True,
                            drop_last=True)

    writer = SummaryWriter(comment=f'Lr:{lr}___BS:{batch_size}')
    global_step = 0
    optimizer = optim.RMSprop(net.parameters(), lr=lr, weight_decay=1e-8)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, 'min' if net.n_classes > 1 else 'max', patience=2)
    if net.n_classes > 1:
        loss_func = nn.CrossEntropyLoss()
    else:
        loss_func = nn.BCEWithLogitsLoss()
        for epoch in range(epochs):
            net.train()
            epoch_loss = 0
            with tqdm(total=len(dataset) * 0.9,
                      desc='Epoch: {}/{}'.format((epoch + 1), epochs),
                      unit='img') as pbar:
                for batch in train_loader:
                    imgs = batch['image']
                    true_mask = batch['mask']
                    imgs = imgs.to(device=device, dtype=torch.float32)
                    assert imgs.shape[1] == net.n_channels, \
                        f'Network has been defined with {net.n_channels} input channels, ' \
                        f'but loaded images have {imgs.shape[1]} channels. Please check that ' \
                        'the images are loaded correctly.'

                    if net.n_classes == 1:
                        mask_type = torch.float32
                    else:
                        mask_type = torch.long
                    true_mask = true_mask.to(device=device, dtype=mask_type)

                    mask_pred = net(imgs)
                    loss = loss_func(mask_pred, true_mask)
                    epoch_loss = epoch_loss + loss.item()

                    writer.add_scalar('Loss/train', loss.item(), global_step)
                    pbar.set_postfix(**{'loss (batch)': loss.item()})

                    optimizer.zero_grad()
                    loss.backward()
                    nn.utils.clip_grad_value_(net.parameters(), 0.1)
                    optimizer.step()

                    pbar.update(imgs.shape[0])
                    global_step = global_step + 1

                    if global_step % (n_train // (10 * batch_size)) == 0:
                        for tag, value in net.named_parameters():
                            tag = tag.replace('.', '/')
                            writer.add_histogram(
                                'weights/' + tag,
                                value.data.cpu().detach().numpy(), global_step)
                            writer.add_histogram(
                                'grads/' + tag,
                                value.grad.data.cpu().detach().numpy(),
                                global_step)
                        val_score = eval_net(net, val_loader, device)
                        scheduler.step(val_score)
                        writer.add_scalar('learning_rate',
                                          optimizer.param_groups[0]['lr'],
                                          global_step)

                        if net.n_classes > 1:
                            logging.info('Validation cross entropy: {}'.format(
                                val_score))
                            writer.add_scalar('Loss/test', val_score,
                                              global_step)
                        else:
                            logging.info(
                                'Validation Dice Coeff: {}'.format(val_score))
                            writer.add_scalar('Dice/test', val_score,
                                              global_step)

                        writer.add_images('images', imgs, global_step)
                        if net.n_classes == 1:
                            writer.add_images('masks/true', true_mask,
                                              global_step)
                            writer.add_images('masks/pred',
                                              torch.sigmoid(mask_pred) > 0.5,
                                              global_step)
            try:
                os.mkdir(dir_checkpoint)
            except OSError:
                pass
            torch.save(net.state_dict(),
                       dir_checkpoint + 'Epoch {}.pth'.format(epoch + 1))
        writer.close()
def getNetworkResults(model, model_type):
    """
    Main result function
    """

    #Specify wanted results
    plot_input_output = False
    plot_data = False
    plot_class_activation_map = False
    plot_activation_map = False
    plot_kernel_inspection = False
    hierarchical_clustering = False
    kmean_cluster = True
    spectral_cluster = True
    pred_time = False

    #Load test dataset
    test_data = LoadDataset("dataset/kaggle_original_train/", 0)
    test_data, test_label, val, val_label = test_data.load_data()

    # make save directory
    os.makedirs(os.path.join(model_type), exist_ok=True)
    os.chdir(model_type)

    # get autoencoder, encoder and decoder
    AUTO, ENC, DEC = model.getModel()

    #Predict encoder output
    encoded = ENC.predict(test_data)

    #Fit the tsne algorithm
    TSNE = TSNEAlgo()
    TSNE.tsne_fit(encoded, perplexity=35)

    if plot_input_output == True:
        # Visualize input and output from autoencoder
        visualize_input_output(AUTO, test_data, model_type)

    if model_type == "globalAverage" and plot_class_activation_map == True:
        visualize_class_activation_map(model, test_data)

    if plot_activation_map == True:
        visualize_activation_map(model, test_data)

    if plot_data == True:
        #Predict results using TSNE
        TSNE.tsne_plot(test_data, test_label, model_type, model_type)

    if plot_kernel_inspection == True:
        kernel_inspection(model, test_data)

    if hierarchical_clustering == True:
        hierarchical(ENC,
                     TSNE,
                     test_data,
                     test_label,
                     save_name="hierarchical.png")

    if kmean_cluster == True:
        kmean(ENC, TSNE, test_data, test_label)

    if spectral_cluster == True:
        spectral(ENC, TSNE, test_data, test_label)

    if pred_time == True:
        os.chdir("..")
        data = LoadDataset("dataset/kaggle_augmented_train_new/", 0)
        data, test_label, val, val_label = data.load_data()
        os.chdir(model_type)
        predictionTime(ENC, data)
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as shc
from sklearn.datasets import load_breast_cancer

from load_dataset import LoadDataset

bc_data = LoadDataset(which='bc')
ildp_data = LoadDataset(which='ildp')
hp_data = LoadDataset(which='hp')

plt.figure(figsize=(5, 4))
#plt.title("Dendograms")
clust = shc.linkage(bc_data.data.data, method='ward')
dend = shc.dendrogram(clust)
filename = 'results/dendrogram_bc.pdf'
plt.savefig(filename, bbox_inches='tight')
plt.show()

plt.figure(figsize=(5, 4))
#plt.title("Dendograms")
clust = shc.linkage(hp_data.data.data, method='ward')
dend = shc.dendrogram(clust)
filename = 'results/dendrogram_ildp.pdf'
plt.savefig(filename, bbox_inches='tight')
plt.show()

plt.figure(figsize=(5, 4))
#plt.title("Dendograms")
clust = shc.linkage(ildp_data.data.data, method='ward')
dend = shc.dendrogram(clust)
filename = 'results/dendrogram_hp.pdf'
示例#8
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.neural_network import MLPClassifier

from explainer_tabular import LimeTabularExplainer
from load_dataset import LoadDataset

test = LoadDataset(which='bc')
X = test.data.data

feature_names = test.data.feature_names
target_names = test.data.target_names

# train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80)
# np.save("X_train.npy", train)
# np.save("X_test.npy", test)
# np.save("y_train.npy", labels_train)
# np.save("y_test.npy", labels_test)

train = np.load("data/X_train.npy")
test = np.load("data/X_test.npy")
labels_train = np.load("data/y_train.npy")
labels_test = np.load("data/y_test.npy")

nn = MLPClassifier(solver='lbfgs',
                   alpha=1e-5,
                   hidden_layer_sizes=(5, 2),
示例#9
0
if __name__=="__main__":
    tf.keras.backend.clear_session()

    model = "auto"


    if model == "auto":
        model_type = "COAPNET"
        latent_vector = "globalAverage"
        model = buildNetwork(model_type, latent_vector,latent_dim = 64, epochs = 50,train = True,noisy = False)
        auto, enc, pre = model.getModel()
        getNetworkResults(model,latent_vector)


    if model == "dec":
        train_data = LoadDataset("dataset/kaggle_original_train/",0)
        train_data, train_label, val, val_label = train_data.load_data()
        encoded_x= np.reshape(train_data,(3720, 64 *64))

        model_type = "COAPNET"
        latent_vector = "globalAverage"
        model = buildNetwork(model_type, latent_vector,latent_dim = 64, epochs = 5,train = True,noisy = False)
        auto, enc, pre = model.getModel()


        from DEC import DeepEmbeddedClustering
        from results import getDECNetworkResults

        dec = DeepEmbeddedClustering(auto,enc,train_data,train_label,5)
        dec.buildModel()
        dec.trainModel()
示例#10
0
 def loadData(self):
     train_data = LoadDataset(self.load_data_dir, 0.1)
     self.train_data, self.train_label, self.validation_data, self.validation_label = train_data.load_data(
     )
     return
示例#11
0
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import AgglomerativeClustering
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors

from explainer_tabular import LimeTabularExplainer
from load_dataset import LoadDataset

test = LoadDataset(which='hp')
X = test.data.data

feature_names = test.data.feature_names
target_names = test.data.target_names

# train, test, labels_train, labels_test = train_test_split(test.data.data, test.data.target, train_size=0.80)
# np.save("X_train_hp.npy", train)
# np.save("X_test_hp.npy", test)
# np.save("y_train_hp.npy", labels_train)
# np.save("y_test_hp.npy", labels_test)

train = np.load("data/X_train_hp.npy")
test = np.load("data/X_test_hp.npy")
labels_train = np.load("data/y_train_hp.npy")
labels_test = np.load("data/y_test_hp.npy")

rf = RandomForestClassifier(n_estimators=10, random_state=0)
rf.fit(train, labels_train)
i = np.random.randint(0, test.shape[0])