# Paths dataset_folder_path = os.path.join("files", "dataset") #%% Prepare Data # Imports from utils.preprocessing import * from data.DataSet import DataSet from functools import partial import numpy as np dataset = DataSet() dataset.load(dataset_folder_path, test_set_percentage=0.333, validation_set_percentage=0) dataset.apply(apply_mean_centering) dataset.apply(apply_unit_distance_normalization) #dataset.apply(partial(normalize_pressure_value, max_pressure_val=512)) dataset.apply(partial(spline_interpolate_and_resample, num_samples=NUM_SAMPLES)) dataset.expand_many(partial(rotate_digit, degrees=[5, 10, 15, 45, -5, -10, -15, -45])) dataset.expand(reverse_digit_sequence) # dataset.apply(lambda digit: convert_xy_to_derivative(digit, normalize=False)) #dataset.apply(partial(convert_xy_to_derivative, normalize=True)) #%% Split Train, Valid, Test # Imports import numpy as np from sklearn.model_selection import train_test_split X_train_valid = np.array(dataset.train_data) X_test = np.array(dataset.test_data) # Convert labels to numpy array and OneHot encode them encoder, train_valid_labels, _, Y_test = dataset.onehot_encode_labels() train_valid_labels = train_valid_labels.astype('float32').todense() Y_test = Y_test.astype('float32').todense()
print("Training Data Len:", len(dataset.train_data)) print("Validation Data Len:", len(dataset.valid_data)) print("Test Data Len:", len(dataset.test_data)) #%% NUM_SAMPLES = 50 ANGLES_TO_ROTATE = [5, 10, 15, 45, -5, -10, -15, -45] from utils.preprocessing import * from functools import partial dataset.apply(apply_mean_centering) #dataset.apply(apply_unit_distance_normalization) dataset.apply(partial(spline_interpolate_and_resample, num_samples=NUM_SAMPLES)) dataset.expand_many(partial(rotate_digit, degrees=ANGLES_TO_ROTATE)) dataset.expand(reverse_digit_sequence) print("Training Data Len:", len(dataset.train_data)) print("Validation Data Len:", len(dataset.valid_data)) print("Test Data Len:", len(dataset.test_data)) #%% import numpy as np X_train = np.array(dataset.train_data) X_valid = np.array(dataset.valid_data) X_test = np.array(dataset.test_data) # Convert labels to numpy array and OneHot encode them encoder, Y_train, Y_valid, Y_test = dataset.onehot_encode_labels()