def main(): # Define matrix dimensions Nobs = 1000 # Number of observation Nvars = 50000 # Number of variables Ncomp = 100 # Number of components # Simulated true sources S_true = np.random.logistic(0,1,(Ncomp,Nvars)) # Simulated true mixing A_true = np.random.normal(0,1,(Nobs,Ncomp)) # X = AS X = np.dot(A_true,S_true) # add some noise X = X + np.random.normal(0,1,X.shape) # apply ICA on X and ask for 2 components model = ica1(Ncomp) start = time.time() A,S = model.fit(X) total = time.time() - start print('total time: {}'.format(total)) # compare if our estimates are accurate # correlate A with Atrue and take aCorr = np.abs(np.corrcoef(A.T,A_true.T)[:Ncomp,Ncomp:]).max(axis = 0).mean() sCorr = np.abs(np.corrcoef(S,S_true)[:Ncomp,Ncomp:]).max(axis = 0).mean() print("Accuracy of estimated sources: %.2f"%sCorr) print("Accuracy of estimated mixing: %.2f"%aCorr)
def test_mv_rejection(self): # three dimensional input samples old_data, labels = synthetic_data() gen = DataGenerator(old_data, n_components=3, n_samples=1000, n_batches=10, method='rejective') new_data = [x for x in gen] for i in range(10): self.assertEqual(new_data[i].shape[0],1000) old_model = gen.parameters['sample_hist'] model = ica1(3) # foreach data batch for i in range(10): new_A, new_S = model.fit(new_data[i]) # Reorder components c_SS = corrcoef(new_S, gen.sources)[3:,:3] source_sim = abs(c_SS).max(axis=1) # Check sources are similar self.assertTrue(all(source_sim>0.95)) order = abs(c_SS).argmax(axis=1) signs = array([sign(x[order[n]]) for n,x in enumerate(c_SS)]) new_S = new_S[order,:] * signs.reshape((-1,1)) new_A = new_A[:,order] * signs.reshape((1,-1)) new_model = [histogram(column, density=True, bins=20)\ for column in new_A.T] # Check that resulting new mixing has similar histogram for j in range(len(new_model)): sim = abs(corrcoef(new_model[j][0], old_model[j][0])) self.assertTrue( sim[0,1] > 0.8, 'simmilarity {} is too low'.format(sim))
def main(): # Define matrix dimensions Nobs = 1000 # Number of observation Nvars = 50000 # Number of variables Ncomp = 100 # Number of components # Simulated true sources S_true = np.random.logistic(0, 1, (Ncomp, Nvars)) # Simulated true mixing A_true = np.random.normal(0, 1, (Nobs, Ncomp)) # X = AS X = np.dot(A_true, S_true) # add some noise X = X + np.random.normal(0, 1, X.shape) # apply ICA on X and ask for 2 components model = ica1(Ncomp) start = time.time() A, S = model.fit(X) total = time.time() - start #print('total time: {}'.format(total)) # compare if our estimates are accurate # correlate A with Atrue and take aCorr = np.abs(np.corrcoef(A.T, A_true.T)[:Ncomp, Ncomp:]).max(axis=0).mean() sCorr = np.abs(np.corrcoef(S, S_true)[:Ncomp, Ncomp:]).max(axis=0).mean()
def test_ICA_infomax_clean(self): start = time.time() A, S = ica.ica1(self.clean_data, self.NCOMP) end = time.time() print('\ttime: {}:.2f'.format(end - start)) # Check right dimensions of Output self.assertEqual(A.shape, (self.NSUB, self.NCOMP)) self.assertEqual(S.shape, (self.NCOMP, self.NVOX)) idx = find_sources_order(self.sources, S) S = S[idx, :] A = A[:, idx] # Check the accuracy of output self.assertGreater(mean_corr(self.sources, S), 0.95) self.assertGreater(mean_corr(self.loading, A), 0.95)
def test_mv_normal(self): # three dimensional input samples old_data, labels = synthetic_data() gen = DataGenerator(old_data, n_components=3, n_samples=1000, n_batches=10, method='normal') new_data = [x for x in gen] # Checking that there is enough samples for i in range(10): self.assertEqual(new_data[i].shape[0],1000) old_model = (gen.parameters['sample_mean'], gen.parameters['sample_cov']) model = ica1(3) # foreach data batch for i in range(10): new_A, new_S = model.fit(new_data[i]) # Reorder components c_SS = corrcoef(new_S, gen.sources)[3:,:3] source_sim = abs(c_SS).max(axis=1) # Check sources are similar self.assertTrue(all(source_sim>0.95)) order = abs(c_SS).argmax(axis=1) signs = array([sign(x[order[n]]) for n,x in enumerate(c_SS)]) new_S = new_S[order,:] * signs.reshape((-1,1)) new_A = new_A[:,order] * signs.reshape((1,-1)) new_model = (new_A.mean(axis=0), cov(new_A, rowvar=0)) # Check that resulting new mixing has similar mean and cov error = norm(abs(new_model[0] - old_model[0]))/3 self.assertTrue(error < 0.01) error = norm(abs(new_model[1] - old_model[1]).ravel())/9 self.assertTrue(error < 0.01)
def test_mv_normal(self): # three dimensional input samples old_data, labels = synthetic_data() gen = DataGenerator(old_data, n_components=3, n_samples=1000, n_batches=10, method='normal') new_data = [x for x in gen] # Checking that there is enough samples for i in range(10): self.assertEqual(new_data[i].shape[0], 1000) old_model = (gen.parameters['sample_mean'], gen.parameters['sample_cov']) model = ica1(3) # foreach data batch for i in range(10): new_A, new_S = model.fit(new_data[i]) # Reorder components c_SS = corrcoef(new_S, gen.sources)[3:, :3] source_sim = abs(c_SS).max(axis=1) # Check sources are similar self.assertTrue(all(source_sim > 0.95)) order = abs(c_SS).argmax(axis=1) signs = array([sign(x[order[n]]) for n, x in enumerate(c_SS)]) new_S = new_S[order, :] * signs.reshape((-1, 1)) new_A = new_A[:, order] * signs.reshape((1, -1)) new_model = (new_A.mean(axis=0), cov(new_A, rowvar=0)) # Check that resulting new mixing has similar mean and cov error = norm(abs(new_model[0] - old_model[0])) / 3 self.assertTrue(error < 0.01) error = norm(abs(new_model[1] - old_model[1]).ravel()) / 9 self.assertTrue(error < 0.01)
def test_mv_rejection(self): # three dimensional input samples old_data, labels = synthetic_data() gen = DataGenerator(old_data, n_components=3, n_samples=1000, n_batches=10, method='rejective') new_data = [x for x in gen] for i in range(10): self.assertEqual(new_data[i].shape[0], 1000) old_model = gen.parameters['sample_hist'] model = ica1(3) # foreach data batch for i in range(10): new_A, new_S = model.fit(new_data[i]) # Reorder components c_SS = corrcoef(new_S, gen.sources)[3:, :3] source_sim = abs(c_SS).max(axis=1) # Check sources are similar self.assertTrue(all(source_sim > 0.95)) order = abs(c_SS).argmax(axis=1) signs = array([sign(x[order[n]]) for n, x in enumerate(c_SS)]) new_S = new_S[order, :] * signs.reshape((-1, 1)) new_A = new_A[:, order] * signs.reshape((1, -1)) new_model = [histogram(column, density=True, bins=20)\ for column in new_A.T] # Check that resulting new mixing has similar histogram for j in range(len(new_model)): sim = abs(corrcoef(new_model[j][0], old_model[j][0])) self.assertTrue(sim[0, 1] > 0.8, 'simmilarity {} is too low'.format(sim))
# Import ica function from ica import ica1 import numpy as np import matplotlib.pyplot as plt # Define matrix dimensions Nobs = 100 # Number of observations Nvars = 10000 # Number of variables Ncomp = 2 # Number of components # Simulated true sources S_true = np.random.logistic(0, 1, (Ncomp, Nvars)) # Simulated true mixing A_true = np.random.normal(0, 1, (Nobs, Ncomp)) # X = AS X = np.dot(A_true, S_true) # add some noise X = X + np.random.normal(0, 5, X.shape) # apply ICA on X and ask for 2 components A, S = ica1(X, 2) # compare if our estimates are accurate # correlate A with Atrue and take aCorr = np.abs(np.corrcoef(A.T, A_true.T)[:Ncomp, Ncomp:]).max(axis=0).mean() sCorr = np.abs(np.corrcoef(S, S_true)[:Ncomp, Ncomp:]).max(axis=0).mean() print "Accuracy of estimated sources: %.2f" % sCorr print "Accuracy of estimated mixing: %.2f" % aCorr
def sliding_window_ica(data, win, step, comp): # runs Infomax ICA on chunked array C = create_chunked_array(data, win, step) output = ica1(np.asarray(C),comp) return output[1] # returns sources array
def eawica(sample, constants, wavelet='db4', low_k=5, up_k=95, low_r=5, up_r=95, alpha=6): n_epochs = constants.SECONDS n_channels = sample.shape[0] n_samples = constants.WINDOW fb = filter_bank_class(constants) # COMPUTE WAVELET DECOMPOSED wcs_delta wcs, wcs_beta, wcs_gamma = [], [], [] for i in range(n_channels): GAMMA, BETA, ALPHA, THETA, DELTA = fb.eawica_wavelet_band_pass( sample[i, :], wavelet) pos = i * 3 wcs.append([GAMMA, pos]) wcs.append([BETA, pos + 1]) wcs.append([ALPHA, pos + 2]) wcs_beta.append(BETA) wcs_gamma.append(GAMMA) # CHECKING FIRST CONDITION OVER ALL wcs_delta kurt_list = [] renyi_list = [] for i in range(len(wcs)): # -- kurtosis -- k = kurtosis(wcs[i][0]) kurt_list.append(k) # -- renyi entropy -- pdf = np.histogram(wcs[i][0], bins=10)[0] / wcs[i][0].shape[0] r = entropy.renyientropy(pdf, alpha=alpha, logbase=2, measure='R') renyi_list.append(r) # -- scaling -- kurt_list_scaled = zscore(kurt_list) renyi_list_scaled = zscore(renyi_list) # -- threshold -- low_kurt_threshold, up_kurt_threshold = np.percentile( kurt_list_scaled, low_k), np.percentile(kurt_list_scaled, up_k) low_renyi_threshold, up_renyi_threshold = np.percentile( renyi_list_scaled, low_r), np.percentile(kurt_list_scaled, up_r) cond_11 = np.logical_or(kurt_list_scaled > up_kurt_threshold, kurt_list_scaled < low_kurt_threshold) cond_12 = np.logical_or(renyi_list_scaled > up_renyi_threshold, renyi_list_scaled < low_renyi_threshold) cond_1 = cond_11 + cond_12 # SELECT wcs_delta MARKED AS CONTAINING ARTIFACTUAL INFORMATION signals2check = np.zeros((np.sum(cond_1), n_samples + 1)) indices = np.where(cond_1 == True)[0] for indx in range(len(indices)): if cond_1[indices[indx]]: signals2check[indx, :-1] = wcs[indices[indx]][0] signals2check[indx, -1] = wcs[indices[indx]][1] # ICA INFOMAX DECOMPOSITION OF MARKED signals TO OBTAIN ICs n_components = signals2check.shape[0] A, S, W = ica1(signals2check[:, :-1], n_components) # CHECK SECOND CONDITION OVER EACH EPOCH ON WICs control_k = np.zeros((S.shape[0], (n_epochs))) control_r = np.zeros((S.shape[0], (n_epochs))) for indx1 in range(S.shape[0]): for indx2 in range((n_epochs)): ini = int((indx2 * S.shape[1] / n_epochs)) end = int(ini + S.shape[1] / n_epochs) if end + 1 == S.shape[1]: end += 1 epoch = S[indx1, ini:end] control_k[indx1, indx2] = kurtosis(epoch) pdf = np.histogram(epoch, bins=10)[0] / epoch.shape[0] r = entropy.renyientropy(pdf, alpha=alpha, logbase=2, measure='R') control_r[indx1, indx2] = r table = np.zeros((S.shape[0], n_epochs)) for indx1 in range(n_epochs): control_k[:, indx1] = preprocessing.scale(control_k[:, indx1]) control_r[:, indx1] = preprocessing.scale(control_r[:, indx1]) table = np.logical_or(control_k > up_kurt_threshold, control_k < low_kurt_threshold) + np.logical_or( control_r > up_renyi_threshold, control_r < low_renyi_threshold) # ZEROING THOSE EPOCHS IN WICs MARKED AS ARTIFACTUAL EPOCHS for indx1 in range(S.shape[0]): for indx2 in range(n_epochs): if table[indx1, indx2]: ini = indx2 * int(n_samples / n_epochs) end = ini + int(n_samples / n_epochs) # epochs zeroing S[indx1, ini:end] = 0 # wcs_delta RECONSTRUCTION FROM WICs reconstructed = A.dot(S) for i in range(reconstructed.shape[0]): wcs[int(signals2check[i, -1])][0] = reconstructed[i, :] data_cleaned = np.zeros(sample.shape) for i in range(n_channels): pos = i * 3 data_cleaned[i, :] = wcs[pos][0] + wcs[pos + 1][0] + wcs[ pos + 2][0] + wcs_beta[i] + wcs_gamma[i] return data_cleaned
import numpy as np from ica import ica1 import time NSUB = 1000 NCOMP = 100 NVOX = 50000 true_a = np.random.normal(0,1,NSUB*NCOMP).reshape((NSUB,NCOMP)) true_s = np.random.logistic(0,1,NCOMP*NVOX).reshape((NCOMP, NVOX)) true_x = np.dot(true_a, true_s) + np.random.normal(0,1, NSUB*NVOX).reshape((NSUB,NVOX)) import time start = time.time() A,S = ica1(true_x,NCOMP) end = time.time() print end - start
import numpy as np from ica import ica1 import time NSUB = 1000 NCOMP = 100 NVOX = 50000 true_a = np.random.normal(0, 1, NSUB * NCOMP).reshape((NSUB, NCOMP)) true_s = np.random.logistic(0, 1, NCOMP * NVOX).reshape((NCOMP, NVOX)) true_x = np.dot(true_a, true_s) + np.random.normal(0, 1, NSUB * NVOX).reshape( (NSUB, NVOX)) import time start = time.time() A, S = ica1(true_x, NCOMP) end = time.time() print end - start
def eawica(sample, constants, wavelet='db4', low_k=5, up_k=95, low_r=5, up_r=95, alpha=6): #Recibe la muestra y las constantes #DESDE Data_Manager. También se fijan #los valores de los umbrales n_channels = sample.shape[0] #Primer elemento del vector SHAPE = Filas = Canales = Sensores n_epochs = constants.SECONDS n_samples = constants.WINDOW fb = filter_bank_class(constants) #Envía las constantes a Filter_Bank # Descomposición de la onda en rangos de frecuencias usando filtro EAWICA en tres listas diferentes wcs, wcs_beta, wcs_gamma = [], [], [] # Para cada canal se descompone la señal en los rangos de frecuencia y se agregan GAMMA, BETA y ALPHA for i in range(n_channels): GAMMA, BETA, ALPHA, THETA, DELTA = fb.eawica_wavelet_band_pass(sample[i, :], wavelet) #Aplica el filtro EAWICA paso banda definido en Filter_Bank # La lista WCS tendrá la forma [[GAMMA1,0], [BETA1,1], [ALPHA1,2], [GAMMA2,3], [BETA2,4], [ALPHA2,5], ...] # [[GAMMA1,0], [BETA1,1], [ALPHA1,2], [GAMMA2,3], [BETA2,4], [ALPHA2,5], ...] # [[GAMMA1,0], [BETA1,1], [ALPHA1,2], [GAMMA2,3], [BETA2,4], [ALPHA2,5], ...] pos = i*3 wcs.append([GAMMA, pos]) wcs.append([BETA, pos+1]) wcs.append([ALPHA, pos+2]) # Las listas WCS_Beta y WCS_Gamma tendrán la forma [BETA1, BETA2...] y [GAMMA1, GAMMA2...] wcs_beta.append(BETA) wcs_gamma.append(GAMMA) # CHECKING FIRST CONDITION OVER ALL wcs_delta kurt_list = [] renyi_list = [] for i in range(len(wcs)): # Calcular la kurtosis: E(desviación/variaza)^4 k = kurtosis(wcs[i][0]) #Aplica la kurtosis al primer elemento de cada pareja de la lista WCS kurt_list.append(k) # Calcular la entropía Renyi pdf = np.histogram(wcs[i][0], bins=10)[0]/wcs[i][0].shape[0] #Calcula el histograma (diagrama de 10 barras), #coge el primer elemento (número de repeticiones) #y la divide entre el número de filas de esa componente de WCS r = entropy.renyientropy(pdf,alpha=alpha,logbase=2,measure='R') renyi_list.append(r) # Escalado: transformar los valores en una distribución normal kurt_list_scaled = zscore(kurt_list) renyi_list_scaled = zscore(renyi_list) # Umbrales superior e inferior: calcula los percentiles = valor por debajo del cual se encuentra el porcentaje de los datos especificado low_kurt_threshold, up_kurt_threshold = np.percentile(kurt_list_scaled, low_k), np.percentile(kurt_list_scaled, up_k) low_renyi_threshold, up_renyi_threshold = np.percentile(renyi_list_scaled, low_r), np.percentile(kurt_list_scaled, up_r) # Aplica un OR entre las matrices elemento a elemento para hallar los datos extremos = fuera de los percentiles cond_11 = np.logical_or(kurt_list_scaled > up_kurt_threshold, kurt_list_scaled < low_kurt_threshold) cond_12 = np.logical_or(renyi_list_scaled > up_renyi_threshold, renyi_list_scaled < low_renyi_threshold) cond_1 = cond_11 + cond_12 #Vector con el valor TRUE/FALSE para cada elemento y para las dos condiciones # SELECT wcs_delta MARKED AS CONTAINING ARTIFACTUAL INFORMATION signals2check = np.zeros((np.sum(cond_1), n_samples+1)) #Crear una matriz de ceros: FILAS = número de TRUE en las condiciones, COLUMNAS = número de muestras indices = np.where(cond_1 == True)[0] #Extraer los índices de los elementos cuya condición sea TRUE for indx in range(len(indices)): if cond_1[indices[indx]]: signals2check[indx, :-1] = wcs[indices[indx]][0] signals2check[indx, -1] = wcs[indices[indx]][1] # ICA INFOMAX DECOMPOSITION OF MARKED signals TO OBTAIN ICs n_components = signals2check.shape[0] A,S,W = ica1(signals2check[:,:-1], n_components) # CHECK SECOND CONDITION OVER EACH EPOCH ON WICs control_k = np.zeros((S.shape[0], (n_epochs))) control_r = np.zeros((S.shape[0], (n_epochs))) for indx1 in range(S.shape[0]): for indx2 in range((n_epochs)): ini = int((indx2*S.shape[1]/n_epochs)) end = int(ini + S.shape[1]/n_epochs) if end+1==S.shape[1]: end+=1 epoch = S[indx1,ini:end] control_k[indx1,indx2] = kurtosis(epoch) pdf = np.histogram(epoch, bins=10)[0]/epoch.shape[0] r = entropy.renyientropy(pdf,alpha=alpha,logbase=2,measure='R') control_r[indx1,indx2] = r table = np.zeros((S.shape[0], n_epochs)) for indx1 in range(n_epochs): control_k[:,indx1] = preprocessing.scale(control_k[:,indx1]) control_r[:,indx1] = preprocessing.scale(control_r[:,indx1]) table = np.logical_or(control_k > up_kurt_threshold, control_k < low_kurt_threshold) + np.logical_or(control_r > up_renyi_threshold, control_r < low_renyi_threshold) # ZEROING THOSE EPOCHS IN WICs MARKED AS ARTIFACTUAL EPOCHS for indx1 in range(S.shape[0]): for indx2 in range(n_epochs): if table[indx1,indx2]: ini = indx2*int(n_samples/n_epochs) end = ini + int(n_samples/n_epochs) # epochs zeroing S[indx1,ini:end] = 0 # wcs_delta RECONSTRUCTION FROM WICs reconstructed = A.dot(S) for i in range(reconstructed.shape[0]): wcs[int(signals2check[i,-1])][0] = reconstructed[i,:] data_cleaned = np.zeros(sample.shape) for i in range(n_channels): pos = i*3 data_cleaned[i,:] = wcs[pos][0]+wcs[pos+1][0]+wcs[pos+2][0]+wcs_beta[i]+wcs_gamma[i] return data_cleaned