def load_labeld_data(grayscale = False): """ The value of labels should start from 0 Do we need to normalize the data to be between 0-1? no need currently, otherwise double value needs more memory Grayscale the images :return: """ train_inputs = stl10_input.read_all_images(train_X_path) train_labels = stl10_input.read_labels(train_y_path) test_inputs = stl10_input.read_all_images(test_X_path) test_labels = stl10_input.read_labels(test_y_path) if grayscale: return grayScaler(train_inputs)/255.0, train_labels - 1, grayScaler(test_inputs)/255.0, test_labels - 1 else: return train_inputs/255.0, train_labels - 1, test_inputs/255.0, test_labels - 1
def load_stl10_data(): """ a functio to load stl_10 binary dataset the dataset save in ```stl10_binary``` folder """ x = read_all_images('stl10_binary/train_X.bin') y = read_labels('stl10_binary/train_y.bin') y = y - 1 y = keras.utils.to_categorical(y, num_classes=10) val_x = read_all_images('stl10_binary/test_X.bin') val_y = read_labels('stl10_binary/test_y.bin') val_y = val_y - 1 val_y = keras.utils.to_categorical(val_y, 10) return (x, y), (val_x, val_y)
def load_labeld_data(grayscale=False): """ The value of labels should start from 0 Do we need to normalize the data to be between 0-1? no need currently, otherwise double value needs more memory Grayscale the images :return: """ train_inputs = stl10_input.read_all_images(train_X_path) train_labels = stl10_input.read_labels(train_y_path) test_inputs = stl10_input.read_all_images(test_X_path) test_labels = stl10_input.read_labels(test_y_path) if grayscale: return grayScaler(train_inputs) / 255.0, train_labels - 1, grayScaler( test_inputs) / 255.0, test_labels - 1 else: return train_inputs / 255.0, train_labels - 1, test_inputs / 255.0, test_labels - 1
def __init__(self): # Replace "train" with "test" or "unlabeled" train_re = re.compile(r"train_") train_to_test = lambda x: train_re.sub("test_", x) current_dir = os.getcwd() os.chdir(os.path.join("STL10")) stl10_input.download_and_extract() # Populate the training set self.x_train = stl10_input.read_all_images(stl10_input.DATA_PATH) self.y_train = stl10_input.read_labels(stl10_input.LABEL_PATH) self.y_train -= 1 # Labels are not 0-indexed self.x_test = stl10_input.read_all_images( train_to_test(stl10_input.DATA_PATH)) self.y_test = stl10_input.read_labels( train_to_test(stl10_input.LABEL_PATH)) self.y_test -= 1 # Labels are not 0-indexed #self.x_unlabeled = stl10_input.read_all_images(train_to_unlabeled(stl10_input.DATA_PATH)) os.chdir(current_dir)
def prepare_data(): # Download and Organize data stl.download_and_extract() images = stl.read_all_images(DATA_PATH) labels = stl.read_labels(LABEL_PATH) test_x = stl.read_all_images(TEST_DATA_PATH) test_y = stl.read_labels(TEST_LABEL_PATH) train_x = images[:NUM_TRAINING_SAMPLES] train_y = labels[:NUM_TRAINING_SAMPLES] val_x = images[-NUM_VAL_SAMPLES:] val_y = labels[-NUM_VAL_SAMPLES:] if not os.path.isdir(TRAIN_DIR): os.makedirs(TRAIN_DIR) if not os.path.isdir(VAL_DIR): os.makedirs(VAL_DIR) if not os.path.isdir(TRAIN_DIR): os.makedirs(TRAIN_DIR) stl.save_images(train_x, train_y, TRAIN_DIR) stl.save_images(val_x, val_y, VAL_DIR) stl.save_images(test_x, test_y, TEST_DIR)
def train(): # fileName = "..\\data\\train_X.bin" X_train = read_all_images(fileName) testFile = "..\\data\\test_X.bin" X_test = read_all_images(testFile) test_y_File = "..\\data\\test_y.bin" targets = read_labels(test_y_File) # mnist = fetch_openml('mnist_784', version=1, cache=True) # targets = mnist.target[60000:] # # X_train = mnist.data[:60000] # X_test = mnist.data[60000:] script_directory = os.path.split(os.path.abspath(__file__))[0] colons = [] optimizers = [] colons_paths = [] filepath = 'encoders\\encoder_' + str(0) + '.model' predictor_model = os.path.join(script_directory, filepath) colons_paths.append(predictor_model) input = 4106 #input = 1152 c = Specialist(3, input) c = c.cuda() colons.append(c) c2 = Specialist(3, input) c2 = c2.cuda() colons.append(c2) c3 = Specialist(3, input) c3.cuda() colons.append(c3) c4 = Specialist(3, input) c4.cuda() colons.append(c4) c5 = Specialist(3, input) c5.cuda() colons.append(c5) c6 = Specialist(3, input) c6.cuda() colons.append(c6) c7 = Specialist(3, input) c7.cuda() colons.append(c7) c8 = Specialist(3, input) c8.cuda() colons.append(c8) c9 = Specialist(3, input) c9.cuda() colons.append(c9) c0 = Specialist(3, input) c0.cuda() colons.append(c0) optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer) optimizer2 = torch.optim.Adam(c2.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer2) optimizer3 = torch.optim.Adam(c3.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer3) optimizer4 = torch.optim.Adam(c4.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer4) optimizer5 = torch.optim.Adam(c5.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer5) optimizer6 = torch.optim.Adam(c6.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer6) optimizer7 = torch.optim.Adam(c7.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer7) optimizer8 = torch.optim.Adam(c8.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer8) optimizer9 = torch.optim.Adam(c9.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer9) optimizer0 = torch.optim.Adam(c0.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer0) max_loss = 1999 for iteration in range(MAX_STEPS_DEFAULT): ids = np.random.choice(len(X_train), size=BATCH_SIZE_DEFAULT, replace=False) train = True products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT) p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0) p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products products, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0) if iteration % EVAL_FREQ_DEFAULT == 0: # print_dict = {"0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": ""} print_dict = {1: "", 2: "", 3: "", 4: "", 5: "", 6: "", 7: "", 8: "", 9: "", 10: ""} test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # print_dict = gather_data(print_dict, products, targets, test_ids) # # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # print_dict = gather_data(print_dict, products, targets, test_ids) p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products print("loss 1: ", mim.item()) products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0) p1, p2, p3, p4, p5, p6, p7, p8, p9, p0 = products print("loss 2: ", mim.item()) products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4, p5, p6, p7, p8, p9, p0) print("loss 3: ", mim.item()) print() print("iteration: ", iteration) print_dict = gather_data(print_dict, products, targets, test_ids) print_info(print_dict) test_loss = mim.item() if max_loss > test_loss: max_loss = test_loss print("models saved iter: " + str(iteration)) # for i in range(number_colons): # torch.save(colons[i], colons_paths[i]) print("test loss " + str(test_loss)) print("")
def train(): # fileName = "..\\data\\train_X.bin" X_train = read_all_images(fileName) testFile = "..\\data\\test_X.bin" X_test = read_all_images(testFile) test_y_File = "..\\data\\test_y.bin" targets = read_labels(test_y_File) # mnist = fetch_openml('mnist_784', version=1, cache=True) # targets = mnist.target[60000:] # # X_train = mnist.data[:60000] # X_test = mnist.data[60000:] script_directory = os.path.split(os.path.abspath(__file__))[0] colons = [] optimizers = [] colons_paths = [] filepath = 'encoders\\encoder_' + str(0) + '.model' predictor_model = os.path.join(script_directory, filepath) colons_paths.append(predictor_model) c = CapsNet() c = c.cuda() colons.append(c) optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer) max_loss = 10000000 for iteration in range(MAX_STEPS_DEFAULT): ids = np.random.choice(len(X_train), size=BATCH_SIZE_DEFAULT, replace=False) train = True products, mim, new_preds = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT) if iteration % EVAL_FREQ_DEFAULT == 0: # print_dict = {"0": "", "1": "", "2": "", "3": "", "4": "", "5": "", "6": "", "7": "", "8": "", "9": ""} print_dict = { 1: "", 2: "", 3: "", 4: "", 5: "", 6: "", 7: "", 8: "", 9: "", 0: "" } test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) products, mim, new_preds = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # print_dict = gather_data(print_dict, products, targets, test_ids) # # test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) # print_dict = gather_data(print_dict, products, targets, test_ids) # print("loss 1: ", mim.item()) # products, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) print() print("iteration: ", iteration) print_dict = gather_data(print_dict, new_preds, targets, test_ids) print_info(print_dict) test_loss = mim.item() if max_loss > test_loss: max_loss = test_loss print("models saved iter: " + str(iteration)) # for i in range(number_colons): # torch.save(colons[i], colons_paths[i]) print("test loss " + str(test_loss)) print("")
import numpy as np import tqdm from skimage.io import imread from skimage.transform import resize import matplotlib.pyplot as plt import stl10_input as STL10 #STL-10 DATA_PATH = './train_X.bin' LABEL_PATH = './train_y.bin' x_inp = STL10.read_all_images(DATA_PATH) label = STL10.read_labels(LABEL_PATH) x_processed = np.load('./processed_train_X.npy') for index, img in tqdm.tqdm(enumerate(x_inp)): test_img = resize(img,(229,229), mode = 'constant') processed_vector = x_processed[index] plt.imshow(test_img) plt.show()
# convert images to gray flatten x_train = images_gray_falt_version(images=x_train) # convert type x_train = x_train.astype('float32') / 255. print(x_train.shape, type(x_train)) # Read images (test images) x_test = read_all_images(path_test_x) # convert images to gray flatten x_test = images_gray_falt_version(images=x_test) # convert type x_test = x_test.astype('float32') / 255. print(x_test.shape, type(x_test)) # Read labels (train) y_train = read_labels(path_train_y) print(y_train.shape) # Read labels (train) y_test = read_labels(path_test_y) print(y_test.shape) # train validation x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, train_size=0.80, random_state=64) print(x_train.shape, y_train.shape, x_valid.shape, y_valid.shape) # Read name of labels labels_name = read_names_of_labels(path=path_labels_name)
import keras from keras.engine import Model from keras.layers import Dense, Flatten, Dropout from keras import optimizers import pandas as pd from keras.applications.vgg16 import VGG16 from keras.preprocessing import image from keras.applications.vgg16 import preprocess_input from keras.applications.vgg16 import decode_predictions import stl10_input # Download the data required stl10_input.download_and_extract() x_train = stl10_input.read_all_images('data/stl10_binary/train_X.bin') y_train = stl10_input.read_labels('data/stl10_binary/train_y.bin') x_test = stl10_input.read_all_images('data/stl10_binary/test_X.bin') y_test = stl10_input.read_labels('data/stl10_binary/test_y.bin') # unlabeled = stl10_input.read_all_images('data/stl10_binary/unlabeled_X.bin') print(x_train.shape) print(y_train.shape) print(x_test.shape) print(y_test.shape) # print(unlabeled.shape) # Model model = VGG16(weights='imagenet', include_top=False, input_shape=(96, 96, 3), classes=10)
def train(): # fileName = "data\\train_X.bin" X_train = read_all_images(fileName) testFile = "data\\test_X.bin" X_test = read_all_images(testFile) test_y_File = "data\\test_y.bin" targets = read_labels(test_y_File) # mnist = fetch_openml('mnist_784', version=1, cache=True) # targets = mnist.target[60000:] # # X_train = mnist.data[:60000] # X_test = mnist.data[60000:] script_directory = os.path.split(os.path.abspath(__file__))[0] colons = [] optimizers = [] colons_paths = [] filepath = 'encoders\\encoder_' + str(0) + '.model' predictor_model = os.path.join(script_directory, filepath) colons_paths.append(predictor_model) input = 4126 #input = 1152 # c = Ensemble() # c.cuda() c = EncoderSTL(3, input) c.cuda() colons.append(c) # c2 = EncoderSTL(3, input) # c2.cuda() # colons.append(c2) # # c3 = EncoderSTL(3, input) # c3.cuda() # colons.append(c3) # # c4 = EncoderSTL(3, input) # c4.cuda() # colons.append(c4) optimizer = torch.optim.Adam(c.parameters(), lr=LEARNING_RATE_DEFAULT) optimizers.append(optimizer) # optimizer2 = torch.optim.Adam(c2.parameters(), lr=LEARNING_RATE_DEFAULT) # optimizers.append(optimizer2) # # optimizer3 = torch.optim.Adam(c3.parameters(), lr=LEARNING_RATE_DEFAULT) # optimizers.append(optimizer3) # # optimizer4 = torch.optim.Adam(c4.parameters(), lr=LEARNING_RATE_DEFAULT) # optimizers.append(optimizer4) max_loss = 1999 for iteration in range(MAX_STEPS_DEFAULT): ids = np.random.choice(len(X_train), size=BATCH_SIZE_DEFAULT, replace=False) train = True p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT) p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4) p1, p2, p3, p4, mim = forward_block(X_train, ids, colons, optimizers, train, BATCH_SIZE_DEFAULT, p1, p2, p3, p4) if iteration % EVAL_FREQ_DEFAULT == 0: test_ids = np.random.choice(len(X_test), size=BATCH_SIZE_DEFAULT, replace=False) p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT) print("loss 1: ", mim.item()) p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4) print("loss 2: ", mim.item()) p1, p2, p3, p4, mim = forward_block(X_test, test_ids, colons, optimizers, False, BATCH_SIZE_DEFAULT, p1, p2, p3, p4) print("loss 3: ", mim.item()) print() print("iteration: ", iteration) print(p1[0]) print(p2[0]) print(p3[0]) print(p4[0]) print_info(p1, p2, p3, p4, targets, test_ids) test_loss = mim.item() if max_loss > test_loss: max_loss = test_loss print("models saved iter: " + str(iteration)) # for i in range(number_colons): # torch.save(colons[i], colons_paths[i]) print("test loss " + str(test_loss)) print("")
nb_classes = 10 nb_epoch = 200 data_augmentation = True # input image dimensions img_rows, img_cols = 96,96 # The CIFAR10 images are RGB. img_channels = 3 from stl10_input import read_all_images,read_labels X_train = read_all_images('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/train_X.bin') X_test = read_all_images('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/test_X.bin') y_train = read_labels('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/train_y.bin')-1 y_test = read_labels('/home/changjianlong/deep-unsupervised-learning/revise1/data/stl10_binary/test_y.bin')-1 X_train = X_train.astype('float32') X_test = X_test.astype('float32') Y_train = np_utils.to_categorical(y_train, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) # subtract mean and normalize mean_image = np.mean(X_train, axis=0) X_train -= mean_image X_test -= mean_image X_train /= 128. X_test /= 128. aug = 4