def load_labeld_data(grayscale = False):
    """
    The value of labels should start from 0
    Do we need to normalize the data to be between 0-1? no need currently, otherwise double value needs more memory
    Grayscale the images
    :return:
    """
    train_inputs = stl10_input.read_all_images(train_X_path)
    train_labels = stl10_input.read_labels(train_y_path)
    test_inputs = stl10_input.read_all_images(test_X_path)
    test_labels = stl10_input.read_labels(test_y_path)
    if grayscale:
        return grayScaler(train_inputs)/255.0, train_labels - 1, grayScaler(test_inputs)/255.0, test_labels - 1
    else:
        return train_inputs/255.0, train_labels - 1, test_inputs/255.0, test_labels - 1
示例#2
0
def load_stl10_data():
    """
    a functio to load stl_10 binary dataset
    the dataset save in ```stl10_binary``` folder
    """
    x = read_all_images('stl10_binary/train_X.bin')
    y = read_labels('stl10_binary/train_y.bin')
    y = y - 1
    y = keras.utils.to_categorical(y, num_classes=10)

    val_x = read_all_images('stl10_binary/test_X.bin')
    val_y = read_labels('stl10_binary/test_y.bin')

    val_y = val_y - 1
    val_y = keras.utils.to_categorical(val_y, 10)
    return (x, y), (val_x, val_y)
def load_labeld_data(grayscale=False):
    """
    The value of labels should start from 0
    Do we need to normalize the data to be between 0-1? no need currently, otherwise double value needs more memory
    Grayscale the images
    :return:
    """
    train_inputs = stl10_input.read_all_images(train_X_path)
    train_labels = stl10_input.read_labels(train_y_path)
    test_inputs = stl10_input.read_all_images(test_X_path)
    test_labels = stl10_input.read_labels(test_y_path)
    if grayscale:
        return grayScaler(train_inputs) / 255.0, train_labels - 1, grayScaler(
            test_inputs) / 255.0, test_labels - 1
    else:
        return train_inputs / 255.0, train_labels - 1, test_inputs / 255.0, test_labels - 1
示例#4
0
 def __init__(self):
     # Replace "train" with "test" or "unlabeled"
     train_re = re.compile(r"train_")
     train_to_test = lambda x: train_re.sub("test_", x)
     current_dir = os.getcwd()
     os.chdir(os.path.join("STL10"))
     stl10_input.download_and_extract()
     # Populate the training set
     self.x_train = stl10_input.read_all_images(stl10_input.DATA_PATH)
     self.y_train = stl10_input.read_labels(stl10_input.LABEL_PATH)
     self.y_train -= 1  # Labels are not 0-indexed
     self.x_test = stl10_input.read_all_images(
         train_to_test(stl10_input.DATA_PATH))
     self.y_test = stl10_input.read_labels(
         train_to_test(stl10_input.LABEL_PATH))
     self.y_test -= 1  # Labels are not 0-indexed
     #self.x_unlabeled = stl10_input.read_all_images(train_to_unlabeled(stl10_input.DATA_PATH))
     os.chdir(current_dir)
示例#5
0
def prepare_data():
    # Download and Organize data
    stl.download_and_extract()
    images = stl.read_all_images(DATA_PATH)
    labels = stl.read_labels(LABEL_PATH)
    test_x = stl.read_all_images(TEST_DATA_PATH)
    test_y = stl.read_labels(TEST_LABEL_PATH)

    train_x = images[:NUM_TRAINING_SAMPLES]
    train_y = labels[:NUM_TRAINING_SAMPLES]
    val_x = images[-NUM_VAL_SAMPLES:]
    val_y = labels[-NUM_VAL_SAMPLES:]

    if not os.path.isdir(TRAIN_DIR):
        os.makedirs(TRAIN_DIR)
    if not os.path.isdir(VAL_DIR):
        os.makedirs(VAL_DIR)
    if not os.path.isdir(TRAIN_DIR):
        os.makedirs(TRAIN_DIR)

    stl.save_images(train_x, train_y, TRAIN_DIR)
    stl.save_images(val_x, val_y, VAL_DIR)
    stl.save_images(test_x, test_y, TEST_DIR)
def train():
    #
    fileName = "..\\data\\train_X.bin"
    X_train = read_all_images(fileName)

    testFile = "..\\data\\test_X.bin"
    X_test = read_all_images(testFile)

    test_y_File = "..\\data\\test_y.bin"
    targets = read_labels(test_y_File)

    # mnist = fetch_openml('mnist_784', version=1, cache=True)
    # targets = mnist.target[60000:]
    #
    # X_train = mnist.data[:60000]
    # X_test = mnist.data[60000:]

    script_directory = os.path.split(os.path.abspath(__file__))[0]

    colons = []

    optimizers = []
    colons_paths = []

    filepath = 'encoders\\encoder_' + str(0) + '.model'
    predictor_model = os.path.join(script_directory, filepath)
    colons_paths.append(predictor_model)

    input = 4106
    #input = 1152


    c = Specialist(3, input)
    c = c.cuda()
    colons.append(c)

    c2 = Specialist(3, input)
    c2 = c2.cuda()
    colons.append(c2)

    c3 = Specialist(3, input)
    c3.cuda()
    colons.append(c3)

    c4 = Specialist(3, input)
    c4.cuda()
    colons.append(c4)

    c5 = Specialist(3, input)
    c5.cuda()
    colons.append(c5)

    c6 = Specialist(3, input)
    c6.cuda()
    colons.append(c6)

    c7 = Specialist(3, input)
    c7.cuda()
    colons.append(c7)

    c8 = Specialist(3, input)
    c8.cuda()
    colons.append(c8)

    c9 = Specialist(3, input)
    c9.cuda()
    colons.append(c9)

    c0 = Specialist(3, input)
    c0.cuda()
    colons.append(c0)

    optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer)

    optimizer2 = torch.optim.Adam(c2.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer2)

    optimizer3 = torch.optim.Adam(c3.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer3)

    optimizer4 = torch.optim.Adam(c4.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer4)

    optimizer5 = torch.optim.Adam(c5.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer5)

    optimizer6 = torch.optim.Adam(c6.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer6)

    optimizer7 = torch.optim.Adam(c7.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer7)

    optimizer8 = torch.optim.Adam(c8.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer8)

    optimizer9 = torch.optim.Adam(c9.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer9)

    optimizer0 = torch.optim.Adam(c0.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer0)

    max_loss = 1999

    for iteration in range(MAX_STEPS_DEFAULT):

        ids = np.random.choice(len(X_train), size=BATCH_SIZE_DEFAULT, replace=False)

        train = True
        products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT)
        p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products
        products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0)
        p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products
        products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0)

        if iteration % EVAL_FREQ_DEFAULT == 0:
            # print_dict = {"0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": ""}
            print_dict = {1: "", 2: "", 3: "", 4: "", 5: "", 6: "", 7: "", 8: "", 9: "", 10: ""}

            test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False)
            products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)


            # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False)
            # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)
            # print_dict = gather_data(print_dict, products, targets, test_ids)
            #
            # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False)
            # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)
            # print_dict = gather_data(print_dict, products, targets, test_ids)

            p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products
            print("loss 1: ", mim.item())
            products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0)
            p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products
            print("loss 2: ", mim.item())
            products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0)
            print("loss 3: ", mim.item())

            print()
            print("iteration: ", iteration)

            print_dict = gather_data(print_dict, products, targets, test_ids)
            print_info(print_dict)

            test_loss = mim.item()

            if max_loss > test_loss:
                max_loss = test_loss
                print("models saved iter: " + str(iteration))
                # for i in range(number_colons):
                #     torch.save(colons[i], colons_paths[i])

            print("test loss " + str(test_loss))
            print("")
def train():
    #
    fileName = "..\\data\\train_X.bin"
    X_train = read_all_images(fileName)

    testFile = "..\\data\\test_X.bin"
    X_test = read_all_images(testFile)

    test_y_File = "..\\data\\test_y.bin"
    targets = read_labels(test_y_File)

    # mnist = fetch_openml('mnist_784', version=1, cache=True)
    # targets = mnist.target[60000:]
    #
    # X_train = mnist.data[:60000]
    # X_test = mnist.data[60000:]

    script_directory = os.path.split(os.path.abspath(__file__))[0]

    colons = []

    optimizers = []
    colons_paths = []

    filepath = 'encoders\\encoder_' + str(0) + '.model'
    predictor_model = os.path.join(script_directory, filepath)
    colons_paths.append(predictor_model)

    c = CapsNet()
    c = c.cuda()
    colons.append(c)

    optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer)

    max_loss = 10000000

    for iteration in range(MAX_STEPS_DEFAULT):

        ids = np.random.choice(len(X_train),
                               size=BATCH_SIZE_DEFAULT,
                               replace=False)

        train = True
        products, mim, new_preds = forward_block(X_train, ids, colons,
                                                 optimizers, train,
                                                 BATCH_SIZE_DEFAULT)

        if iteration % EVAL_FREQ_DEFAULT == 0:
            # print_dict = {"0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": ""}
            print_dict = {
                1: "",
                2: "",
                3: "",
                4: "",
                5: "",
                6: "",
                7: "",
                8: "",
                9: "",
                0: ""
            }

            test_ids = np.random.choice(len(X_test),
                                        size=BATCH_SIZE_DEFAULT,
                                        replace=False)
            products, mim, new_preds = forward_block(X_test, test_ids, colons,
                                                     optimizers, False,
                                                     BATCH_SIZE_DEFAULT)

            # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False)
            # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)
            # print_dict = gather_data(print_dict, products, targets, test_ids)
            #
            # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False)
            # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)
            # print_dict = gather_data(print_dict, products, targets, test_ids)

            # print("loss 1: ", mim.item())
            # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT)

            print()
            print("iteration: ", iteration)

            print_dict = gather_data(print_dict, new_preds, targets, test_ids)
            print_info(print_dict)

            test_loss = mim.item()

            if max_loss > test_loss:
                max_loss = test_loss
                print("models saved iter: " + str(iteration))
                # for i in range(number_colons):
                #     torch.save(colons[i], colons_paths[i])

            print("test loss " + str(test_loss))
            print("")
示例#8
0
import numpy as np
import tqdm
from skimage.io import imread
from skimage.transform import resize
import matplotlib.pyplot as plt

import stl10_input as STL10

#STL-10
DATA_PATH = './train_X.bin'
LABEL_PATH = './train_y.bin'

x_inp = STL10.read_all_images(DATA_PATH)
label = STL10.read_labels(LABEL_PATH)

x_processed = np.load('./processed_train_X.npy')

for index, img in tqdm.tqdm(enumerate(x_inp)):
    test_img = resize(img,(229,229), mode = 'constant')
    processed_vector = x_processed[index]
    plt.imshow(test_img)
    plt.show()
# convert images to gray flatten
x_train = images_gray_falt_version(images=x_train)
# convert type
x_train = x_train.astype('float32') / 255.
print(x_train.shape, type(x_train))

# Read images (test images)
x_test = read_all_images(path_test_x)
# convert images to gray flatten
x_test = images_gray_falt_version(images=x_test)
# convert type
x_test = x_test.astype('float32') / 255.
print(x_test.shape, type(x_test))

# Read labels (train)
y_train = read_labels(path_train_y)
print(y_train.shape)

# Read labels (train)
y_test = read_labels(path_test_y)
print(y_test.shape)

# train validation
x_train, x_valid, y_train, y_valid = train_test_split(x_train,
                                                      y_train,
                                                      train_size=0.80,
                                                      random_state=64)
print(x_train.shape, y_train.shape, x_valid.shape, y_valid.shape)

# Read name of labels
labels_name = read_names_of_labels(path=path_labels_name)
示例#10
0
import keras
from keras.engine import Model
from keras.layers import Dense, Flatten, Dropout
from keras import optimizers
import pandas as pd
from keras.applications.vgg16 import VGG16
from keras.preprocessing import image
from keras.applications.vgg16 import preprocess_input
from keras.applications.vgg16 import decode_predictions
import stl10_input

# Download the data required
stl10_input.download_and_extract()

x_train = stl10_input.read_all_images('data/stl10_binary/train_X.bin')
y_train = stl10_input.read_labels('data/stl10_binary/train_y.bin')
x_test = stl10_input.read_all_images('data/stl10_binary/test_X.bin')
y_test = stl10_input.read_labels('data/stl10_binary/test_y.bin')
# unlabeled = stl10_input.read_all_images('data/stl10_binary/unlabeled_X.bin')
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)
# print(unlabeled.shape)

# Model
model = VGG16(weights='imagenet',
              include_top=False,
              input_shape=(96, 96, 3),
              classes=10)
def train():
    #
    fileName = "data\\train_X.bin"
    X_train = read_all_images(fileName)

    testFile = "data\\test_X.bin"
    X_test = read_all_images(testFile)

    test_y_File = "data\\test_y.bin"
    targets = read_labels(test_y_File)

    # mnist = fetch_openml('mnist_784', version=1, cache=True)
    # targets = mnist.target[60000:]
    #
    # X_train = mnist.data[:60000]
    # X_test = mnist.data[60000:]

    script_directory = os.path.split(os.path.abspath(__file__))[0]

    colons = []

    optimizers = []
    colons_paths = []

    filepath = 'encoders\\encoder_' + str(0) + '.model'
    predictor_model = os.path.join(script_directory, filepath)
    colons_paths.append(predictor_model)

    input = 4126
    #input = 1152

    # c = Ensemble()
    # c.cuda()

    c = EncoderSTL(3, input)
    c.cuda()
    colons.append(c)

    # c2 = EncoderSTL(3, input)
    # c2.cuda()
    # colons.append(c2)
    #
    # c3 = EncoderSTL(3, input)
    # c3.cuda()
    # colons.append(c3)
    #
    # c4 = EncoderSTL(3, input)
    # c4.cuda()
    # colons.append(c4)

    optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT)
    optimizers.append(optimizer)

    # optimizer2 = torch.optim.Adam(c2.parameters(), lr=LEARNING_RATE_DEFAULT)
    # optimizers.append(optimizer2)
    #
    # optimizer3 = torch.optim.Adam(c3.parameters(), lr=LEARNING_RATE_DEFAULT)
    # optimizers.append(optimizer3)
    #
    # optimizer4 = torch.optim.Adam(c4.parameters(), lr=LEARNING_RATE_DEFAULT)
    # optimizers.append(optimizer4)

    max_loss = 1999

    for iteration in range(MAX_STEPS_DEFAULT):

        ids = np.random.choice(len(X_train),
                               size=BATCH_SIZE_DEFAULT,
                               replace=False)

        train = True
        p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers,
                                            train, BATCH_SIZE_DEFAULT)
        p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers,
                                            train, BATCH_SIZE_DEFAULT, p1, p2,
                                            p3, p4)
        p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers,
                                            train, BATCH_SIZE_DEFAULT, p1, p2,
                                            p3, p4)

        if iteration % EVAL_FREQ_DEFAULT == 0:

            test_ids = np.random.choice(len(X_test),
                                        size=BATCH_SIZE_DEFAULT,
                                        replace=False)

            p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons,
                                                optimizers, False,
                                                BATCH_SIZE_DEFAULT)
            print("loss 1: ", mim.item())
            p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons,
                                                optimizers, False,
                                                BATCH_SIZE_DEFAULT, p1, p2, p3,
                                                p4)
            print("loss 2: ", mim.item())
            p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons,
                                                optimizers, False,
                                                BATCH_SIZE_DEFAULT, p1, p2, p3,
                                                p4)
            print("loss 3: ", mim.item())

            print()
            print("iteration: ", iteration)

            print(p1[0])
            print(p2[0])
            print(p3[0])
            print(p4[0])

            print_info(p1, p2, p3, p4, targets, test_ids)

            test_loss = mim.item()

            if max_loss > test_loss:
                max_loss = test_loss
                print("models saved iter: " + str(iteration))
                # for i in range(number_colons):
                #     torch.save(colons[i], colons_paths[i])

            print("test loss " + str(test_loss))
            print("")
示例#12
0
nb_classes = 10
nb_epoch = 200
data_augmentation = True

# input image dimensions
img_rows, img_cols = 96,96
# The CIFAR10 images are RGB.
img_channels = 3

from stl10_input import read_all_images,read_labels



X_train = read_all_images('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/train_X.bin')
X_test = read_all_images('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/test_X.bin')
y_train = read_labels('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/train_y.bin')-1
y_test = read_labels('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/test_y.bin')-1
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
Y_train = np_utils.to_categorical(y_train, nb_classes)
Y_test = np_utils.to_categorical(y_test, nb_classes)


# subtract mean and normalize
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_test -= mean_image
X_train /= 128.
X_test /= 128.
aug = 4