def preprocess_all_mk3(mode='train', wind=3, butter_order=4, disp=True): """Preprocesses all the data. Mean cancellation by subtracting SENSOR_MEAN and scaling with SENSOR_STD an MA filter is used to reduce the impact of high frequency noise """ csvlist = io.get_file_list(mode=mode, fullpath=True) pif = lambda msg: printflush(msg) if disp else None pif('MK3 preprocessing for ' + mode + ' data\n') for fullpath in csvlist: t0 = time() fpath, fname = os.path.split(fullpath) data = pd.read_csv(fullpath).values[:,1:] pif('Processing ' + fname + ' -- ' + str(data.shape[0]) + ' samples...') # Removes the mean of each sensor data -= utils.SENSOR_MEAN # Scale the data with the standard deviation from the training data data /= utils.SENSOR_STD # Moving average, to remove outliers data = utils.mov_avg(data, wind, axis=0) # TODO # Filter the data brain_list = [] freq_mask = 0 for flo, fhi in utils.FREQUENCY_BANDS.itervalues(): brain_list.append(utils.butter_apply(data, low=flo, high=fhi)) freq_mask = int(round(flo+fhi*10)) del data #Free some memory! final_data = np.concatenate(brain_list, axis=1) # Save preprocessed data and print stuff to console str_wind = 'FULL' if wind==final_data.shape[0] else str(wind) final_fname = fullpath[:-4] + '_mk3_wind' + str_wind + '_fmask' + str(freq_mask) np.save(final_fname, final_data) del brain_list, final_data # Free some memory for the next datafile pif("%.3f"%(time()-t0) + " s\n")
def preprocess_sample(X_raw, normalize=True, filters=utils.FREQUENCY_BANDS.keys(), window_size=300, downsample=1, shuffle=True): if normalize: X_raw = utils.normalize(X_raw) if len(X_raw.shape) > 1: wg = utils.window_generator_ND(X_raw, window_size=window_size, downsample=downsample) else: wg = utils.window_generator_1D(X_raw, window_size=window_size, downsample=downsample) features_extracted = [] for windowed_data in wg: data_point = [] for filter_name in filters: low, high = utils.FREQUENCY_BANDS[filter_name] if len(X_raw.shape) > 1: data_point.extend(np.mean(utils.butter_apply(windowed_data, low, high), axis=0).tolist()) else: data_point.append( np.mean( utils.butter_apply(windowed_data, low, high) ) ) features_extracted.append(data_point) return np.array(features_extracted)