def process_data_set(data_set, filter_threshold=0.1): # %%Preprocess andrews print('#' * 10) print(data_set) print('#' * 10) file_names = misc.folder_names(data_set, 'none') pca_file = os.path.join(file_names['pca_file']) data_file = os.path.join(file_names['npz_file']) # Reading in all data print('---> READING AND PREPROCESSING DATA') files = os.listdir(data_set) files = natsort.natsorted(files) n_locations = len(files) test_data, az, el = preprocess(os.path.join(data_set, files[0]), verbose=False) n_samples = test_data.shape[2] all_mns = numpy.zeros([n_locations, 7, 31, n_samples]) all_vrs = numpy.zeros([n_locations, 7, 31, n_samples]) for i in range(len(files)): data, _, _ = preprocess(os.path.join(data_set, files[i])) mns = numpy.mean(data, axis=3) vrs = numpy.var(data, axis=3) print(data.shape) all_mns[i, :, :, :] = mns all_vrs[i, :, :, :] = vrs # get ID vars print('---> DATA2LONG') n = numpy.arange(n_locations) az_line = az[0, :] el_line = el[:, 0] locs, azs, els = numpy.meshgrid(n, az_line, el_line) locs = numpy.transpose(locs, axes=(1, 2, 0)) azs = numpy.transpose(azs, axes=(1, 2, 0)) els = numpy.transpose(els, axes=(1, 2, 0)) # Reshape data to long format long_data = numpy.reshape(all_mns, (n_locations * 7 * 31, n_samples)) long_lcs = numpy.reshape(locs, (n_locations * 7 * 31)) long_azs = numpy.reshape(azs, (n_locations * 7 * 31)) long_els = numpy.reshape(els, (n_locations * 7 * 31)) id_array = numpy.column_stack((long_lcs, long_azs, long_els)) # Get the variation across all measurements for each sample sample_variance = numpy.mean(all_vrs, axis=(0, 1, 2)) # Select only those templates above threshold summed = numpy.sum(long_data, axis=1) summed = numpy.array(summed) threshold = numpy.min(summed) + filter_threshold include = summed > threshold long_data = long_data[include, :] id_array = id_array[include, :] long_lcs = long_lcs[include] long_azs = long_azs[include] long_els = long_els[include] # Save data print('---> SAVING LONG FORMAT') numpy.savez(data_file, long_data=long_data, long_lcs=long_lcs, long_azs=long_azs, long_els=long_els, ids=id_array, sample_variance=sample_variance, include=include, files=files) # %% print('---> RUN AND SAVE PCA MODEL') pca_model = PCA() pca_model.fit(long_data) pickle_save(pca_file, pca_model) print('#' * 10)
import misc import numpy import settings import os from matplotlib import pyplot pca_plot_file = os.path.join(settings.figure_folder, 'pca.pdf') pyplot.style.use(settings.style) pyplot.figure(figsize=(4, 4)) for data_set in ['israel', 'royal']: files = misc.folder_names(data_set, 'lcs') pca = misc.pickle_load(files['pca_file']) cvar = numpy.cumsum(pca.explained_variance_ratio_) if data_set == 'royal': pyplot.plot(cvar, color='k', linestyle=settings.royal_linestyle) if data_set == 'israel': pyplot.plot(cvar, color='k', linestyle=settings.israel_linestyle) pyplot.legend(['Israel', 'Royal']) pyplot.xlabel('Nr of PCs') pyplot.ylabel('Proportion of explained variance') pyplot.tight_layout() pyplot.savefig(pca_plot_file) pyplot.show()
from tensorflow import keras import misc import settings import os files = misc.folder_names('israel', 'azs') model = keras.models.load_model(files['model_file']) output = os.path.join(settings.figure_folder, 'model.pdf') keras.utils.plot_model(model, to_file=output, show_shapes=True, show_layer_names=False, rankdir='TB', expand_nested=True, dpi=96)
import scipy.stats as stats from matplotlib import pyplot import os import misc import numpy import settings pyplot.style.use(settings.style) data_set = 'royal' loss_function = 'Categorical Cross Entropy' files_lcs = misc.folder_names(data_set, 'lcs') files_azs = misc.folder_names(data_set, 'azs') files_els = misc.folder_names(data_set, 'els') results_azs = misc.pickle_load(files_azs['results_file']) results_els = misc.pickle_load(files_els['results_file']) results_lcs = misc.pickle_load(files_lcs['results_file']) perfect_memory = misc.pickle_load(files_lcs['perfect_memory_file']) output_file = os.path.join(settings.figure_folder, data_set + '_performance.pdf') # %% Plot errors fig, axes = pyplot.subplots(nrows=3, ncols=3) fig.set_figheight(10) fig.set_figwidth(10) pyplot.sca(axes[0, 0])
import numpy import pandas import scipy.spatial.distance as distance import misc import process_functions import settings data_set = 'israel' generate_data = False iterations = 10 print('Running perfect memory for', data_set) file_names = misc.folder_names(data_set, None) output_file = file_names['perfect_memory_file'] if generate_data: process_functions.process_data_set(data_set) # Read prepared data data = numpy.load(file_names['npz_file']) corridor_distances = misc.map_lcs_to_distances(data) pca = misc.pickle_load(file_names['pca_file']) correct_ids = data['ids'] correct_ids[:, 0] = corridor_distances templates = data['long_data'] pca_templates = pca.transform(templates) n_components = settings.n_components inputs = pca_templates[:, :n_components]
import os from matplotlib import pyplot from tensorflow import keras import pandas import misc import process_functions import settings ns = [] ws = [] ws_nb = [] rs = [] for data_set in ['israel', 'royal']: selected_dimension = 'lcs' file_names = misc.folder_names(data_set, selected_dimension) data = numpy.load(file_names['npz_file']) templates = data['long_data'] total_n_numbers = templates.shape[0] * settings.n_components new_model = keras.models.load_model(file_names['model_file']) weights = new_model.get_weights() total_n_weights = 0 total_n_weights_no_bias = 0 for w in weights: m = numpy.matrix(w) total_n_weights = total_n_weights + (m.shape[0] * m.shape[1]) if m.shape[0] > 1: total_n_weights_no_bias = total_n_weights_no_bias + (m.shape[0] * m.shape[1])
import settings import os from matplotlib import pyplot import smoothn training_history_plot_file = os.path.join(settings.figure_folder, 'history.pdf') pyplot.style.use(settings.style) pyplot.figure(figsize=(10,5)) for data_set in ['israel', 'royal']: if data_set == 'israel': pyplot.subplot(1, 2, 1) if data_set == 'royal': pyplot.subplot(1, 2, 2) for dimension in ['azs', 'els', 'lcs']: files = misc.folder_names(data_set, dimension) history_file = files['history_file'] history = misc.pickle_load(history_file) if data_set == 'israel': linestyle = settings.israel_linestyle if data_set == 'royal': linestyle = settings.royal_linestyle if dimension == 'azs': color = settings.azs_color if dimension == 'els': color = settings.els_color if dimension == 'lcs': color = settings.lcs_color trace = numpy.array(history['loss']) trace = smoothn.smoothn(trace, s0=1)[0] pyplot.plot(trace, color=color) pyplot.ylim([0, 4]) pyplot.xlabel('Epoch')