def testGetWaveletFeaturesTest(self): #See if we can reproduce the data from the wavelet X, X2, Xs, Xopls, YList, df = MetabolomicsUtils.loadData() waveletStr = 'db4' mode = "zpd" level = 10 C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) X0 = pywt.waverec(C, waveletStr, mode) tol = 10**-6 self.assertTrue(numpy.linalg.norm(X0 - X[0, :]) < tol) def reconstructSignal(X, Xw, waveletStr, level, mode, C): Xrecstr = numpy.zeros(X.shape) for i in range(Xw.shape[0]): C2 = [] colIndex = 0 for j in range(len(list(C))): C2.append(Xw[i, colIndex:colIndex+len(C[j])]) colIndex += len(C[j]) Xrecstr[i, :] = pywt.waverec(tuple(C2), waveletStr, mode) return Xrecstr #Now do the same for the whole X C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol) waveletStr = 'db8' C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol) waveletStr = 'haar' C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) Xrecstr = reconstructSignal(X, Xw, waveletStr, level, mode, C) self.assertTrue(numpy.linalg.norm(X - Xrecstr) < tol)
def testReconstructSignal(self): numExamples = 100 numFeatures = 16 X = numpy.random.rand(numExamples, numFeatures) level = 10 mode = "cpd" waveletStr = "db4" C = pywt.wavedec(X[0, :], waveletStr, mode, level=10) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) X2 = MetabolomicsUtils.reconstructSignal(X, Xw, waveletStr, mode, C) tol = 10**-6 self.assertTrue(numpy.linalg.norm(X - X2) < tol)
def testFilterWavelet(self): numExamples = 100 numFeatures = 16 X = numpy.random.rand(numExamples, numFeatures) level = 10 mode = "cpd" waveletStr = "db4" C = pywt.wavedec(X[0, :], waveletStr, mode, level=10) Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) N = 10 Xw2, inds = MetabolomicsUtils.filterWavelet(Xw, N) tol = 10**-6 self.assertEquals(inds.shape[0], N) self.assertTrue(numpy.linalg.norm( Xw[:, inds] - Xw2[:, inds] ) < tol) zeroInds = numpy.setdiff1d(numpy.arange(Xw.shape[1]), inds) self.assertTrue(numpy.linalg.norm(Xw2[:, zeroInds]) < tol)
logging.debug("Running from machine " + str(gethostname())) numpy.random.seed(21) os.system('taskset -p 0xffffffff %d' % os.getpid()) dataDir = PathDefaults.getDataDir() + "metabolomic/" metaUtils = MetabolomicsUtils() X, XStd, X2, (XoplsCortisol, XoplsTesto, XoplsIgf1), YCortisol, YTesto, YIgf1, ages = metaUtils.loadData() #We model 99.1% of the spectrum with 100 eigenvectors pca = PCA(n_components=100) XPca = pca.fit_transform(X) mode = "cpd" level = 10 XwDb4 = MetabolomicsUtils.getWaveletFeatures(X, 'db4', level, mode) XwDb8 = MetabolomicsUtils.getWaveletFeatures(X, 'db8', level, mode) XwHaar = MetabolomicsUtils.getWaveletFeatures(X, 'haar', level, mode) dataDict = {} dataDict["raw"] = X dataDict["pca"] = XPca #dataDict["log"] = X2 dataDict["Db4"] = XwDb4 dataDict["Db8"] = XwDb8 dataDict["Haar"] = XwHaar numpy.random.seed(datetime.datetime.now().microsecond) parser = argparse.ArgumentParser(description='Run the metabolomics experiments')
#Now try some filtering and plot N versus reconstruction error Ns = range(0, 700, 50) waveletStrs = ['haar', 'db4', 'db8'] waveletStrs2 = ['Haar', 'Db4', 'Db8'] errors = numpy.zeros((len(waveletStrs), len(Ns))) mode = "cpd" standardiser = Standardiser() #X = standardiser.centreArray(X) plotStyles = ['k-', 'k--', 'k-.', 'k:', 'k.'] for i in range(len(waveletStrs)): print(i) waveletStr = waveletStrs[i] Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) for j in range(len(Ns)): N = Ns[j] Xw2, inds = MetabolomicsUtils.filterWavelet(Xw, N) X2 = MetabolomicsUtils.reconstructSignal(X, Xw2, waveletStr, mode, C) errors[i, j] = numpy.linalg.norm(X - X2) #Plot example wavelet after filtering waveletStr = "haar" N = 100 Xw = MetabolomicsUtils.getWaveletFeatures(X, waveletStr, level, mode) C = pywt.wavedec(X[0, :], waveletStr, level=level, mode=mode) Xw2, inds = MetabolomicsUtils.filterWavelet(Xw, N)