def standardize_data(self): factor_new = 0.01 for i_series in xrange(8): X_series = self.train_X_series[i_series] if i_series == 0: init_block_size=8000 means = exponential_running_mean(X_series, factor_new=factor_new, init_block_size=init_block_size, axis=None) demeaned = X_series - means stds = np.sqrt(exponential_running_var_from_demeaned( demeaned, factor_new, init_block_size=init_block_size, axis=None)) else: start_mean = means[-1] start_var = stds[-1] * stds[-1] means = exponential_running_mean(X_series, factor_new=factor_new, start_mean=start_mean, axis=None) demeaned = X_series - means stds = np.sqrt(exponential_running_var_from_demeaned( demeaned, factor_new, start_var=start_var, axis=None)) eps = 1e-6 standardized = demeaned / np.maximum(stds, eps) self.train_X_series[i_series] = standardized # for later test standardizing self.final_std = stds[-1] self.final_mean = means[-1]
def test_exponential_preprocessings(): data = np.array([ 1, 3, 5, 9, 7, -2]) exp_run_mean = np.array([ 0.200000, 0.160000, 0.328000, 0.262400, 0.409920, 0.327936]) exp_run_var = np.array([ 0.928000, 1.745920, 3.697869, 9.952428, 9.922424, 12.347100]) exp_standardized = np.array([ 0.830455, 1.695258, 1.763925, 1.874509, 0.993934, -1.336228]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2,start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([ 2, 0, -5, -3, 0, 4]) exp_run_mean = np.array([ 0.400000, 0.320000, -0.744000, -1.195200, -0.956160, 0.035072]) exp_run_var = np.array([ 1.312000, 1.070080, 4.478771, 4.234478, 3.570431, 6.000475]) exp_standardized = np.array([ 1.396861, -0.309344, -2.011047, -0.877060, 0.506023, 1.618611]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2,start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([ -3, 5, 8, 7, 4, -2]) exp_run_mean = np.array(np.array([ -0.600000, 0.520000, 2.016000, 3.012800, 3.210240, 2.168192])) exp_run_var = np.array([ 1.952000, 5.575680, 11.622195, 12.477309, 10.106591, 11.560038]) exp_standardized = np.array([ -1.717795, 1.897270, 1.755284, 1.128775, 0.248424, -1.225937]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2,start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([ 1, 0, 1, 0, 1, 0]) exp_run_mean = np.array([ 0.200000, 0.160000, 0.328000, 0.262400, 0.409920, 0.327936]) exp_run_var = np.array([ 0.928000, 0.747520, 0.688333, 0.564437, 0.521188, 0.438459]) exp_standardized = np.array([ 0.830455, -0.185058, 0.809972, -0.349266, 0.817360, -0.495250]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2,start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized)
def standardize_test_data(self): factor_new = 0.01 for i_series in xrange(2): X_series = self.test_X_series[i_series] start_mean = self.final_mean start_var = self.final_std * self.final_std means = exponential_running_mean(X_series, factor_new=factor_new, start_mean=start_mean, axis=None) demeaned = X_series - means stds = np.sqrt(exponential_running_var_from_demeaned( demeaned, factor_new, start_var=start_var, axis=None)) eps = 1e-6 standardized = demeaned / np.maximum(stds, eps) self.test_X_series[i_series] = standardized
def standardize_data(self): factor_new = 0.01 for i_series in xrange(8): X_series = self.train_X_series[i_series] if i_series == 0: init_block_size = 8000 means = exponential_running_mean( X_series, factor_new=factor_new, init_block_size=init_block_size, axis=None) demeaned = X_series - means stds = np.sqrt( exponential_running_var_from_demeaned( demeaned, factor_new, init_block_size=init_block_size, axis=None)) else: start_mean = means[-1] start_var = stds[-1] * stds[-1] means = exponential_running_mean(X_series, factor_new=factor_new, start_mean=start_mean, axis=None) demeaned = X_series - means stds = np.sqrt( exponential_running_var_from_demeaned(demeaned, factor_new, start_var=start_var, axis=None)) eps = 1e-6 standardized = demeaned / np.maximum(stds, eps) self.train_X_series[i_series] = standardized # for later test standardizing self.final_std = stds[-1] self.final_mean = means[-1]
def running_standardize_epo(epo, factor_new=0.9, init_block_size=50): """ Running standardize channelwise.""" assert factor_new <= 1.0 and factor_new >= 0.0 running_means = exponential_running_mean(epo.data, factor_new=factor_new, init_block_size=init_block_size, axis=1) running_means = np.expand_dims(running_means, 1) demeaned_data = epo.data - running_means running_vars = exponential_running_var_from_demeaned(demeaned_data, running_means, factor_new=factor_new, init_block_size=init_block_size, axis=1) running_vars = np.expand_dims(running_vars, 1) running_std = np.sqrt(running_vars) standardized_epo_data = demeaned_data / running_std return epo.copy(data=standardized_epo_data)
def standardize_test_data(self): factor_new = 0.01 for i_series in xrange(2): X_series = self.test_X_series[i_series] start_mean = self.final_mean start_var = self.final_std * self.final_std means = exponential_running_mean(X_series, factor_new=factor_new, start_mean=start_mean, axis=None) demeaned = X_series - means stds = np.sqrt( exponential_running_var_from_demeaned(demeaned, factor_new, start_var=start_var, axis=None)) eps = 1e-6 standardized = demeaned / np.maximum(stds, eps) self.test_X_series[i_series] = standardized
def update_and_standardize(self, samples): if self.running_mean is not None: assert self.running_var is not None next_means = exponential_running_mean(samples, factor_new=self.factor_new, start_mean=self.running_mean) demeaned = samples - next_means next_vars = exponential_running_var_from_demeaned( demeaned, factor_new=self.factor_new, start_var=self.running_var) standardized = demeaned / np.maximum(self.eps, np.sqrt(next_vars)) self.running_mean = next_means[-1] self.running_var = next_vars[-1] return standardized else: self.running_mean = np.mean(samples, axis=0) self.running_var = np.var(samples, axis=0) return (samples - self.running_mean) / np.maximum( self.eps, np.sqrt(self.running_var))
def test_exponential_preprocessings(): data = np.array([1, 3, 5, 9, 7, -2]) exp_run_mean = np.array( [0.200000, 0.160000, 0.328000, 0.262400, 0.409920, 0.327936]) exp_run_var = np.array( [0.928000, 1.745920, 3.697869, 9.952428, 9.922424, 12.347100]) exp_standardized = np.array( [0.830455, 1.695258, 1.763925, 1.874509, 0.993934, -1.336228]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2, start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([2, 0, -5, -3, 0, 4]) exp_run_mean = np.array( [0.400000, 0.320000, -0.744000, -1.195200, -0.956160, 0.035072]) exp_run_var = np.array( [1.312000, 1.070080, 4.478771, 4.234478, 3.570431, 6.000475]) exp_standardized = np.array( [1.396861, -0.309344, -2.011047, -0.877060, 0.506023, 1.618611]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2, start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([-3, 5, 8, 7, 4, -2]) exp_run_mean = np.array( np.array([-0.600000, 0.520000, 2.016000, 3.012800, 3.210240, 2.168192])) exp_run_var = np.array( [1.952000, 5.575680, 11.622195, 12.477309, 10.106591, 11.560038]) exp_standardized = np.array( [-1.717795, 1.897270, 1.755284, 1.128775, 0.248424, -1.225937]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2, start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized) data = np.array([1, 0, 1, 0, 1, 0]) exp_run_mean = np.array( [0.200000, 0.160000, 0.328000, 0.262400, 0.409920, 0.327936]) exp_run_var = np.array( [0.928000, 0.747520, 0.688333, 0.564437, 0.521188, 0.438459]) exp_standardized = np.array( [0.830455, -0.185058, 0.809972, -0.349266, 0.817360, -0.495250]) run_mean = exponential_running_mean(np.array(data), factor_new=0.2, start_mean=0) demeaned = data - run_mean run_var = exponential_running_var_from_demeaned(demeaned, factor_new=0.2, start_var=1) standardized = exponential_running_standardize(data, factor_new=0.2, start_mean=0, start_var=1) assert np.allclose(exp_run_mean, run_mean) assert np.allclose(exp_run_var, run_var) assert np.allclose(exp_standardized, standardized)