def load_sensor_files(training_data_path, testing_data_path, num_training_samples=1000000, num_training_samples_per_file=100, num_test_samples=10000, num_test_samples_per_file=10, history_length=3, feature_indexes=None, label_indexes=None, shared=True): from os import listdir from os.path import isdir from random import sample from random import shuffle from numpy import array file_names = listdir(training_data_path) training_file_paths = [training_data_path + name for name in file_names] training_file_paths = [path for path in training_file_paths if not (isdir(path) or "trajectory" in path)] num_training_files = min([len(training_file_paths), max([1, num_training_samples / num_training_samples_per_file])]) training_file_paths = sample(training_file_paths, num_training_files) shuffle(training_file_paths) file_names = listdir(testing_data_path) test_file_paths = [testing_data_path + name for name in file_names] test_file_paths = [path for path in test_file_paths if not (isdir(path) or "trajectory" in path)] num_test_files = min([len(test_file_paths), max([1, num_test_samples / num_test_samples_per_file])]) test_file_paths = sample(test_file_paths, num_test_files) shuffle(test_file_paths) print '... loading training data' training_data, training_labels = load_data_set(training_file_paths, num_training_samples_per_file, history_length, feature_indexes, label_indexes) print '... loading testing data' test_data, test_labels = load_data_set(test_file_paths, num_test_samples_per_file, history_length, feature_indexes, label_indexes) print '... ' + str(len(training_data)) + ' training samples loaded' print '... ' + str(len(test_data)) + ' test samples loaded' if shared: training_data = shared_dataset(training_data) training_labels = shared_dataset(training_labels) test_data = shared_dataset(test_data) test_labels = shared_dataset(test_labels) return training_data, training_labels, test_data, test_labels
def load_config_data(config_path): from os import listdir from data_loader import shared_dataset from numpy import array num_files_per_layer = 4 file_names = listdir(config_path) file_names = sorted(file_names) autoencoder_file_paths = [config_path + name for name in file_names if name.startswith("al")] supervised_file_paths = [config_path + name for name in file_names if name.startswith("sl")] autoencoder_weights = [] for i in range(len(autoencoder_file_paths)/num_files_per_layer): with open(autoencoder_file_paths[num_files_per_layer*i], 'r') as weights_file: weights_data = weights_file.read() weights = array([[float(value) for value in line.split(",")] for line in weights_data.split('\n')]).T with open(autoencoder_file_paths[num_files_per_layer*i + 1], 'r') as weights_prime_file: weights_prime_data = weights_prime_file.read() weights_prime = array([[float(value) for value in line.split(",")] for line in weights_prime_data.split('\n')]).T with open(autoencoder_file_paths[num_files_per_layer*i + 2], 'r') as bias_file: bias_data = bias_file.readline() bias = array([float(value) for value in bias_data.split(",")]) with open(autoencoder_file_paths[num_files_per_layer*i + 3], 'r') as bias_prime_file: bias_prime_data = bias_prime_file.readline() bias_prime = array([float(value) for value in bias_prime_data.split(",")]) weights = shared_dataset(weights, name='W') bias = shared_dataset(bias, name='b') weights_prime = shared_dataset(weights_prime, name='Whid') bias_prime = shared_dataset(bias_prime, name='bvis') autoencoder_weights.append((weights, bias, weights_prime, bias_prime)) with open(supervised_file_paths[0], 'r') as supervised_weights_file: supervised_weights_data = supervised_weights_file.read() supervised_weights = shared_dataset(array([[float(value) for value in line.split(",")] for line in supervised_weights_data.split('\n')]).T, name='W') with open(supervised_file_paths[1], 'r') as supervised_bias_file: supervised_bias_data = supervised_bias_file.readline() supervised_bias = shared_dataset(array([float(value) for value in supervised_bias_data.split(",")]), name='b') config_file_path = config_path + "conf.txt" with open(config_file_path, 'r') as config_file: config_data = config_file.readlines() return autoencoder_weights, (supervised_weights, supervised_bias), config_data