def load_ciarp(dataset_name): """ Load ciarp dataset. Returns (x, y): as dataset x and y. """ dataset_path = '/develop/data/{}/data'.format(dataset_name) if not os.path.exists(dataset_path): os.makedirs(dataset_path) data = hd.load(dataset_name, dataset_path) # TODO: define best way to do this x_train, y_train = data['train_Kinect_WithoutGabor'] x_test, y_test = data['test_Kinect_WithoutGabor'] x = np.concatenate((x_train, x_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) return x, y
def load_lsa16(dataset_name): """ Load lsa16 dataset. Returns (x, y): as dataset x and y. """ DATASET_PATH = '/develop/data/{}/data'.format(dataset_name) if not os.path.exists(DATASET_PATH): os.makedirs(DATASET_PATH) data = hd.load(dataset_name, DATASET_PATH) # TODO: define best way to do this x_train, y_train = data['train_Kinect_WithoutGabor'] x_test, y_test = data['test_Kinect_WithoutGabor'] X = np.concatenate((x_train, x_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) return X, y
def load_rwth(data_dir, config, splits): """ Load rwth dataset. Args: data_dir (str): path of the directory with 'splits', 'data' subdirs. config (dict): general dict with program settings. splits (list): list of strings 'train'|'val'|'test' Returns (dict): dictionary with keys as splits and values as tf.Dataset """ DATASET_NAME = "rwth" DATASET_PATH = "/develop/data/rwth/data" data = hd.load(DATASET_NAME, DATASET_PATH) features = data[0] classes = data[1]['y'] good_min = 20 good_classes = [] for i in range(len(classes)): images = features[np.equal(i, classes)] if len(images) >= good_min: good_classes = good_classes + [i] good_x = features[np.in1d(classes, good_classes)] good_y = classes[np.in1d(classes, good_classes)] my_dict = dict(zip(np.unique(good_y), range(len(np.unique(good_y))))) good_y = np.vectorize(my_dict.get)(good_y) features, classes = good_x, good_y uniqueClasses = np.unique(classes) x_train, x_test, y_train, y_test = train_test_split_balanced( features, classes, train_size=config['data.train_size'], test_size=config['data.test_size']) x_train, x_test = x_train / 255.0, x_test / 255.0 _, amountPerTrain = np.unique(y_train, return_counts=True) _, amountPerTest = np.unique(y_test, return_counts=True) train_datagen_args = dict( featurewise_center=True, featurewise_std_normalization=True, rotation_range=config['data.rotation_range'], width_shift_range=config['data.width_shift_range'], height_shift_range=config['data.height_shift_range'], horizontal_flip=config['data.horizontal_flip'], fill_mode='constant', cval=0) train_datagen = ImageDataGenerator(train_datagen_args) train_datagen.fit(x_train) test_datagen_args = dict(featurewise_center=True, featurewise_std_normalization=True, fill_mode='constant', cval=0) test_datagen = ImageDataGenerator(test_datagen_args) test_datagen.fit(x_train) w, h, c = list(map(int, config['model.x_dim'].split(','))) ret = {} for split in splits: # n_way (number of classes per episode) if split in ['val', 'test']: n_way = config['data.test_way'] else: n_way = config['data.train_way'] # n_support (number of support examples per class) if split in ['val', 'test']: n_support = config['data.test_support'] else: n_support = config['data.train_support'] # n_query (number of query examples per class) if split in ['val', 'test']: n_query = config['data.test_query'] else: n_query = config['data.train_query'] if split in ['val', 'test']: y = y_test x = x_test dg = train_datagen dg_args = train_datagen_args else: y = y_train x = x_train dg = test_datagen dg_args = test_datagen_args amountPerClass = amountPerTest if split in ['val', 'test' ] else amountPerTrain i = np.argsort(y) x = x[i, :, :, :] for index in i: x[index, :, :, :] = dg.apply_transform(x[index], dg_args) data = np.reshape(x, (len(uniqueClasses), amountPerClass[0], 132, 92, 3)) data_loader = DataLoader(data, n_classes=len(uniqueClasses), n_way=n_way, n_support=n_support, n_query=n_query, x_dim=(w, h, c)) ret[split] = data_loader return ret
def train(self, dataset_id, epochs, batch_size=128, save_interval=50): # Load the dataset #(X_train, _), (_, _) = mnist.load_data() x, metadata = hd.load(dataset_id) X_train, X_test, Y_train, Y_test = self.split( parameters.get_split_value(dataset_id), x, metadata['y']) # Rescale -1 to 1 X_train = (X_train.astype(np.float32) - 127.5) / 127.5 #X_train = np.expand_dims(X_train, axis=3) #half_batch = int(batch_size / 2) valid = np.ones((batch_size, 1)) fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # --------------------- # Train Discriminator # --------------------- # Select a random half batch of images idx = np.random.randint(0, X_train.shape[0], batch_size) imgs, labels = X_train[idx], Y_train[idx] noise = np.random.normal(0, 1, (batch_size, self.noise_value)) # Generate a half batch of new images gen_imgs = self.generator.predict([noise, labels]) # Train the discriminator d_loss_real = self.discriminator.train_on_batch([imgs, labels], valid) d_loss_fake = self.discriminator.train_on_batch([gen_imgs, labels], fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) # --------------------- # Train Generator # --------------------- sampled_labels = np.random.randint(0, self.classes, batch_size).reshape(-1, 1) # The generator wants the discriminator to label the generated samples # as valid (ones) #valid_y = np.array([1] * batch_size) # Train the generator g_loss = self.combined.train_on_batch([noise, sampled_labels], valid) # Plot the progress print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100 * d_loss[1], g_loss)) # If at save interval => save generated image samples if epoch % save_interval == 0: self.save_imgs(epoch) save_path = os.path.join(default_folder, self.name) self.base_model.save( os.path.join(save_path, f"{self.name}_GANdiscriminator{epochs}.h5")) self.base_model.save_weights( os.path.join(save_path, f"{self.name}_GANdiscriminator{epochs}_weights.h5")) print("Saved model to disk")
asl_class += 1 if (index == 62): asl_class += 2 if (index == 86): asl_class += 1 if (index == -1): worksheet.write(0, col, f"{index}") index += 1 else: worksheet.write(0, col, f"{index}({asl_class})") worksheet.set_column(col, 32) index += 1 asl_class += 1 for i, dataset_id in enumerate(hd.ids()): x, metadata = hd.load(dataset_id) worksheet.write(i + 1, 0, dataset_id) worksheet.set_row(i + 1, 32) flag = np.zeros(metadata['y'].max() + 1) if (x.shape[3] == 1): x = np.repeat(x, 3, -1) for h in range(len(x)): clas = metadata['y'][h] if (flag[clas] == 0): path_to_save = cache_path / f"{dataset_id}image{h}.png" if (dataset_id == "PugeaultASL_B"): img_depth = np.zeros((x[h].shape[0], x[h].shape[1]), dtype='f8') max_z = x[h].max()
def __init__(self, dataset_id, **kwargs): if 'version' in kwargs: ver = kwargs['version'] if 'delete' in kwargs: supr = kwargs['delete'] try: self.dataset = hd.load(dataset_id, version=ver, delete=supr) except: try: self.dataset = hd.load(dataset_id, version=ver) except: try: self.dataset = hd.load(dataset_id, delete=supr) except: self.dataset = hd.load(dataset_id) self.input_shape = self.dataset[0][0].shape self.img_rows = (self.input_shape[0] // 4) * 4 self.img_cols = (self.input_shape[1] // 4) * 4 self.channels = 3 self.name = dataset_id if (self.name == "psl" or self.name == "indianB"): self.img_shape = (128, 128, self.channels) self.img_rows = 128 self.img_cols = 128 else: if (self.name == "indianA"): self.img_shape = (64, 64, self.channels) self.img_rows = 64 self.img_cols = 64 else: self.img_shape = (self.img_rows, self.img_cols, self.channels) self.classes = self.dataset[1]['y'].max() + 1 self.noise_value = 100 optimizer = Adam(0.0002, 0.5) # Build and compile the discriminator self.base_model, self.discriminator = self.build_discriminator() self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) # Build and compile the generator self.generator = self.build_generator() noise = keras.layers.Input(shape=(self.noise_value, )) label = keras.layers.Input(shape=(1, )) img = self.generator([noise, label]) #self.generator.compile(loss='binary_crossentropy', optimizer=optimizer) # The generator takes noise as input and generated imgs #z = keras.layers.Input(shape=(100,)) #img = self.generator(z) # For the combined model we will only train the generator self.discriminator.trainable = False # The valid takes generated images as input and determines validity valid = self.discriminator([img, label]) # The combined model (stacked generator and discriminator) takes # noise as input => generates images => determines validity self.combined = Model([noise, label], valid) self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) self.path = default_folder if not os.path.exists(self.path): os.makedirs(self.path)
def load_rwth(config, path=None): """ Load rwth dataset. Returns (x, y): as dataset x and y. """ train_size = config['data.train_size'] test_size = config['data.test_size'] n_train_per_class = config['data.n_train_per_class'] n_test_per_class = config['data.n_test_per_class'] if path == None: path = '/tf/data/{}'.format(config['data.dataset']) data_dir = os.path.join(path, 'data') if not os.path.exists(data_dir): os.makedirs(data_dir) data = hd.load(config['data.dataset'], Path(data_dir)) if config['data.split']: split_dir = os.path.join(path, 'splits', config['data.split']) def split_file(split): return os.path.join(split_dir, f"{split}.txt") x_train, y_train = load_from_split(config['data.dataset'], config['data.version'], data_dir, split_file('train')) x_test, y_test = load_from_split(config['data.dataset'], config['data.version'], data_dir, split_file('test')) x_val, y_val = load_from_split(config['data.dataset'], config['data.version'], data_dir, split_file('val')) else: good_min = 40 good_classes = [] n_unique = len(np.unique(data[1]['y'])) for i in range(n_unique): images = data[0][np.equal(i, data[1]['y'])] if len(images) >= good_min: good_classes = good_classes + [i] x = data[0][np.in1d(data[1]['y'], good_classes)] y = data[1]['y'][np.in1d(data[1]['y'], good_classes)] y_dict = dict(zip(np.unique(y), range(len(np.unique(y))))) y = np.vectorize(y_dict.get)(y) split = train_test_split if n_train_per_class <= 0 else train_test_split_balanced if n_train_per_class <= 0: x_train, x_test, y_train, y_test = split(x, y, train_size=train_size, test_size=test_size, stratify=y) x_train, x_val, y_train, y_val = split(x_train, y_train, train_size=0.8, test_size=0.2, stratify=y_train) else: n_train_per_class = int(np.round(n_train_per_class * 1.6)) x_train, x_test, y_train, y_test = split( np.array(x), np.array(y), train_size=train_size, test_size=test_size, n_train_per_class=n_train_per_class, n_test_per_class=n_test_per_class) x_train, x_val, y_train, y_val = split( x_train, y_train, train_size=0.8, n_train_per_class=n_train_per_class, test_size=0.2) return (x_train, y_train), (x_val, y_val), (x_test, y_test)
import handshape_datasets import sklearn import keras from keras.models import Model dataset_id = "lsa16" #example for the lsa16 dataset ver = "color" #version is optional argument, some datasets has one version supr = False #supr is optional, some datasets can delete temporary files if it have .npz file epochs = 15 batch_size = 64 dataset = handshape_datasets.load(dataset_id, version=ver, delete=supr) #load the dataset input_shape = dataset[0][0].shape #obtain the shape classes = dataset[1]['y'].max() + 1 #obtain the number ofclasses """build the model""" base_model = keras.applications.mobilenet.MobileNet( input_shape=(input_shape[0], input_shape[1], 3), weights='imagenet', include_top=False) output = keras.layers.GlobalAveragePooling2D()(base_model.output) output = keras.layers.Dense(32, activation='relu')(output) output = keras.layers.Dense(classes, activation='softmax')(output) model = Model(inputs=base_model.input, outputs=output) model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) """split the dataset (its optional)""" test_size = 0.1 X_train, X_test, Y_train, Y_test = sklearn.model_selection.train_test_split(
import handshape_datasets as hd import logging import numpy as np #hd.list_datasets() x, metadata = hd.load("rwth") print(x.shape) for k in metadata: print(k, metadata[k].shape, metadata[k].min(), metadata[k].max()) #hd.clear("Nus1") logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.DEBUG) #logging.debug(f"This message should go to the log file") #logging.info("So should this") #logging.warning("And this, too")
# Basic example showing how to get a dataset import handshape_datasets as hd DATASET_NAME = "PugeaultASL_A" #ersion=dict({'1':'WithGabor'}) #ciarp_info = hd.info(DATASET_NAME) #x,metadata = hd.load(DATASET_NAME,version='WithGabor') ciarp #x,metadata = hd.load(DATASET_NAME, version='bw') nus1 #x,metadata = hd.load(DATASET_NAME, version='hn') x, metadata = hd.load(DATASET_NAME) print(x.shape) print(x[1]) print(x.max()) print(x.min()) for k in metadata: print(k, metadata[k].shape, metadata[k].min(), metadata[k].max()) #print(ciarp_info.summary()) #ciarp[0].show_dataset() #nunca devuelve un dataset, sino que en el caso de lsa16 devuelve un np.array y un dict (x y metadata) # ciarp.show_dataset(subsets=["test_Kinect_WithGabor"],samples=128) # ciarp.show_dataset(subsets=["test_Kinect_WithGabor"],samples=[1,2,3,0,15,1,200])
def load_ciarp(data_dir, config, splits): """ Load ciarp dataset. Args: data_dir (str): path of the directory with 'splits', 'data' subdirs. config (dict): general dict with program settings. splits (list): list of strings 'train'|'val'|'test' Returns (dict): dictionary with keys as splits and values as tf.Dataset """ DATASET_NAME = "ciarp" DATASET_PATH = "/develop/data/ciarp/data" data = hd.load(DATASET_NAME, DATASET_PATH) x_train, y_train = data['train_Kinect_WithoutGabor'] x_test, y_test = data['test_Kinect_WithoutGabor'] X = np.concatenate((x_train, x_test), axis=0) y = np.concatenate((y_train, y_test), axis=0) _, uniqueClasses = np.unique(y, return_counts=True) _, amountPerTrain = np.unique(y_train, return_counts=True) _, amountPerTest = np.unique(y_test, return_counts=True) x_train, x_test, y_train, y_test = train_test_split_balanced(X, y, train_size=config['data.train_size'], test_size=config['data.test_size']) x_train, x_test = x_train / 255.0, x_test / 255.0 _, amountPerTrain = np.unique(y_train, return_counts=True) _, amountPerTest = np.unique(y_test, return_counts=True) train_datagen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=config['data.rotation_range'], width_shift_range=config['data.width_shift_range'], height_shift_range=config['data.height_shift_range'], horizontal_flip=config['data.horizontal_flip'], fill_mode='constant', cval=0) train_datagen = ImageDataGenerator(train_datagen_args) train_datagen.fit(x_train) test_datagen_args = dict(featurewise_center=True, featurewise_std_normalization=True, fill_mode='constant', cval=0) test_datagen = ImageDataGenerator(test_datagen_args) test_datagen.fit(x_train) w, h, c = list(map(int, config['model.x_dim'].split(','))) ret = {} for split in splits: # n_way (number of classes per episode) if split in ['val', 'test']: n_way = config['data.test_way'] else: n_way = config['data.train_way'] # n_support (number of support examples per class) if split in ['val', 'test']: n_support = config['data.test_support'] else: n_support = config['data.train_support'] # n_query (number of query examples per class) if split in ['val', 'test']: n_query = config['data.test_query'] else: n_query = config['data.train_query'] if split in ['val', 'test']: y = y_test x = x_test dg = train_datagen dg_args = train_datagen_args else: y = y_train x = x_train dg = test_datagen dg_args = test_datagen_args amountPerClass = amountPerTest if split in ['val', 'test'] else amountPerTrain i = np.argsort(y) x = x[i, :, :, :] if config['model.type'] in ['processed']: for index in i: x[index, :, :, :] = dg.apply_transform(x[index], dg_args) data = np.reshape(x, (len(uniqueClasses), amountPerClass[0], w, h, c)) data_loader = DataLoader(data, n_classes=len(uniqueClasses), n_way=n_way, n_support=n_support, n_query=n_query, x_dim=(w, h, c)) ret[split] = data_loader return ret