def offline_analysis(data_folder: str = None, parameters: dict = {}, alert_finished: bool = True): """ Gets calibration data and trains the model in an offline fashion. pickle dumps the model into a .pkl folder Args: data_folder(str): folder of the data save all information and load all from this folder parameter(dict): parameters for running offline analysis alert_finished(bool): whether or not to alert the user offline analysis complete How it Works: - reads data and information from a .csv calibration file - reads trigger information from a .txt trigger file - filters data - reshapes and labels the data for the training procedure - fits the model to the data - uses cross validation to select parameters - based on the parameters, trains system using all the data - pickle dumps model into .pkl file - generates and saves offline analysis screen - [optional] alert the user finished processing """ if not data_folder: data_folder = load_experimental_data() mode = 'calibration' trial_length = parameters.get('collection_window_after_trial_length') raw_dat, _, channels, type_amp, fs = read_data_csv( data_folder + '/' + parameters.get('raw_data_name', 'raw_data.csv')) log.info(f'Channels read from csv: {channels}') log.info(f'Device type: {type_amp}') downsample_rate = parameters.get('down_sampling_rate', 2) # Remove 60hz noise with a notch filter notch_filter_data = notch.notch_filter(raw_dat, fs, frequency_to_remove=60) # bandpass filter from 2-45hz filtered_data = bandpass.butter_bandpass_filter(notch_filter_data, 2, 45, fs, order=2) # downsample data = downsample.downsample(filtered_data, factor=downsample_rate) # Process triggers.txt triggers_file = parameters.get('trigger_file_name', 'triggers.txt') _, t_t_i, t_i, offset = trigger_decoder( mode=mode, trigger_path=f'{data_folder}/{triggers_file}') static_offset = parameters.get('static_trigger_offset', 0) offset = offset + static_offset # Channel map can be checked from raw_data.csv file. # read_data_csv already removes the timespamp column. channel_map = analysis_channels(channels, type_amp) x, y, _, _ = trial_reshaper(t_t_i, t_i, data, mode=mode, fs=fs, k=downsample_rate, offset=offset, channel_map=channel_map, trial_length=trial_length) k_folds = parameters.get('k_folds', 10) model, auc = train_pca_rda_kde_model(x, y, k_folds=k_folds) log.info('Saving offline analysis plots!') # After obtaining the model get the transformed data for plotting purposes model.transform(x) generate_offline_analysis_screen( x, y, model=model, folder=data_folder, down_sample_rate=downsample_rate, fs=fs, save_figure=True, show_figure=False, channel_names=analysis_channel_names_by_pos(channels, channel_map)) log.info('Saving the model!') with open(data_folder + f'/model_{auc}.pkl', 'wb') as output: pickle.dump(model, output) if alert_finished: offline_analysis_tone = parameters.get('offline_analysis_tone') play_sound(offline_analysis_tone) return model
def _demo_validate_real_data(): ds_rate = 2 channel_map = [1] * 16 + [0, 0, 1, 1, 0, 1, 1, 1, 0] data_train_folder = load_experimental_data() mode = 'calibration' raw_dat, stamp_time, channels, type_amp, fs = read_data_csv( data_train_folder + '/rawdata.csv') dat = sig_pro(raw_dat, fs=fs, k=ds_rate) # Get data and labels s_i, t_t_i, t_i = trigger_decoder(mode=mode, trigger_loc=data_train_folder + '/triggers.txt') x_train, y_train, num_seq, _ = trial_reshaper(t_t_i, t_i, dat, mode=mode, fs=fs, k=ds_rate, channel_map=channel_map) model = train_pca_rda_kde_model(x_train, y_train, k_folds=10) fig = plt.figure() ax = fig.add_subplot(211) x_plot = np.linspace(np.min(model.line_el[-1]), np.max(model.line_el[-1]), 1000)[:, np.newaxis] ax.plot(model.line_el[2][y_train == 0], -0.005 - 0.01 * np.random.random(model.line_el[2][y_train == 0].shape[0]), 'ro', label='class(-)') ax.plot(model.line_el[2][y_train == 1], -0.005 - 0.01 * np.random.random(model.line_el[2][y_train == 1].shape[0]), 'go', label='class(+)') for idx in range(len(model.pipeline[2].list_den_est)): log_dens = model.pipeline[2].list_den_est[idx].score_samples(x_plot) ax.plot(x_plot[:, 0], np.exp(log_dens), 'r-' * (idx == 0) + 'g-' * (idx == 1), linewidth=2.0) ax.legend(loc='upper right') plt.title('Training Data') plt.ylabel('p(e|l)') plt.xlabel('scores') # Test data_test_folder = load_experimental_data() mode = 'calibration' raw_dat, stamp_time, channels, type_amp, fs = read_data_csv( data_test_folder + '/rawdata.csv') dat = sig_pro(raw_dat, fs=fs, k=ds_rate) # Get data and labels s_i, t_t_i, t_i = trigger_decoder(mode=mode, trigger_loc=data_test_folder + '/triggers.txt') x_test, y_test, num_seq, _ = trial_reshaper(t_t_i, t_i, dat, mode=mode, fs=fs, k=ds_rate, channel_map=channel_map) model.transform(x_test) ax.plot(model.line_el[2][y_test == 0], -0.01 - 0.01 * np.random.random(model.line_el[2][y_test == 0].shape[0]), 'bo', label='t_class(-)') ax.plot(model.line_el[2][y_test == 1], -0.01 - 0.01 * np.random.random(model.line_el[2][y_test == 1].shape[0]), 'ko', label='t_class(+)') bandwidth = 1.06 * min(np.std(model.line_el[2]), iqr(model.line_el[2]) / 1.34) * np.power( model.line_el[2].shape[0], -0.2) test_kde = KernelDensityEstimate(bandwidth=bandwidth) test_kde.fit(model.line_el[2], y_test) for idx in range(len(model.pipeline[2].list_den_est)): log_dens = test_kde.list_den_est[idx].score_samples(x_plot) ax.plot(x_plot[:, 0], np.exp(log_dens), 'b--' * (idx == 0) + 'k--' * (idx == 1), linewidth=2.0) ax.legend(loc='upper right') plt.title('Training Data') plt.ylabel('p(e|l)') plt.xlabel('scores') plt.show()
def offline_analysis(data_folder=None, parameters={}): """ Gets calibration data and trains the model in an offline fashion. pickle dumps the model into a .pkl folder Args: data_folder(str): folder of the data save all information and load all from this folder Duty cycle - reads data and information from a .csv calibration file - reads trigger information from a .txt trigger file - filters data - reshapes and labels the data for the training procedure - fits the model to the data - uses cross validation to select parameters - based on the parameters, trains system using all the data - pickle dumps model into .pkl file - generates and saves offline analysis screen """ if not data_folder: data_folder = load_experimental_data() mode = 'calibration' raw_dat, stamp_time, channels, type_amp, fs = read_data_csv( data_folder + '/' + parameters.get('raw_data_name', 'raw_data.csv')) print(f'Channels read from csv: {channels}') print(f'Device type: {type_amp}') downsample_rate = parameters.get('down_sampling_rate', 2) filtered_data = sig_pro(raw_dat, fs=fs, k=downsample_rate) # Process triggers.txt triggers_file = parameters.get('triggers_file_name', 'triggers.txt') _, t_t_i, t_i, offset = trigger_decoder( mode=mode, trigger_loc=f"{data_folder}/{triggers_file}") # Channel map can be checked from raw_data.csv file. # read_data_csv already removes the timespamp column. channel_map = analysis_channels(channels, type_amp) x, y, num_seq, _ = trial_reshaper(t_t_i, t_i, filtered_data, mode=mode, fs=fs, k=downsample_rate, offset=offset, channel_map=channel_map) k_folds = parameters.get('k_folds', 10) model = train_pca_rda_kde_model(x, y, k_folds=10) print('Saving offline analysis plots!') generate_offline_analysis_screen(x, y, model, data_folder) print('Saving the model!') with open(data_folder + '/model.pkl', 'wb') as output: pickle.dump(model, output) return model