/
nonlinanharchizer.py
executable file
·287 lines (231 loc) · 13 KB
/
nonlinanharchizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# NonlinAnharChizer: Nonlinearity and Anharmonicity Characterizer, is a script that evaluates the anharmonic shape
# of (quasi-)periodic input signals and measures their degree of nonlinearity derived from it. To this effect, it uses
# a multiresolution representation based on P. Hanusse's /Nonlinear Trigonometric/ functions from which it derives the
# dual and projects it on the signal; this is done at each pixel. Then, from it, it calculates a synthetic sparsity
# (which describes amplitude fluctuations, analogous to the shimmer of Fourier representation), the optimal shape
# parameter (which represents departure from Fourier) and a nonlinearity measure (as the gain in sparsity from Fourier);
# all these calculated for windows, which should be at least as large as the fundamental period.
# The projectors are calculated once and then applied to the signal, so that we obtain the representation coefficients directly.
# The shape parameter itself is window-varying, so it needs an optimization routine at the window level (a huge improvement
# from running optimizers at the pixel level!). This is the slowest part of the processing. A LUT approach could help here if
# a big corpus of files needed to be batch processed, run in a big-mem system. By default, and in the absence of tolerance-based
# stopping criteria, we let users define a max number of iterations from the command line. A bit of testing shows when outputs
# stop moving any significantly.
# There are two command line parameters: "Window length" and "maximum number of iterations", which are the ones likely requiring
# a rather finer tuning. Internally, there is also the output interval (by default, the same as the window length, i.e., no overlap)
# and the size of the multiresolution pyramid, N. It is important that N is large enough to capture the larger-scale dynamics,
# while simultaneously small enough to avoid a sparsity being dominated by the nearly null low-frequency coefficients, which would
# unfairly favor Fourier. N=32 seems to work well for 16kHz speech; check the matplots of lvlEnergy if you want to try other values.
# The representation basis is a multiresolution frame based on P. Hanusse's psin_1 and pcos_1 functions (for which the definition
# series have closed expressions). Like with Fourier series', it is possible to use both psin and pcos, or just either. To complete
# the basis at finite resolution, we add a unit function. The effect of this on the representation gets canceled if it is tuned to
# the same shape of the analysis window. On the absence of any prior information about the fundamental period, a Hanning window
# enforces the required periodicity; therefore, we add a Hanning kernel as the unit function for representation, which will be taking
# the bulk of the /approximation/ component. Doing this, then the cosine-like functions are worse suited than the sine-like functions
# for representing the /details/ component. In consequence, in this case, we will be using by default psin_1 exclusively (pcos_1 is
# also implemented nevertheless, for testing purposes).
########################
# CODE STARTS HERE #
########################
# A handful of standard modules required:
import numpy as np
import matplotlib.pyplot as plt
from scipy import optimize
from scipy.io import wavfile
import argparse
# Parsing of command line arguments
parser = argparse.ArgumentParser(description='Evaluate anharmonic shape of input files and characterize a nonlinearity measure from it.')
def reasonableWindowType(x):
if 1 <= int(x) <= 1E5:
return int(x)
else:
raise argparse.ArgumentTypeError("Window should be larger than 1 and reasonably narrow")
parser.add_argument('-w', '--window', type=reasonableWindowType, metavar='WINDOW_LENGTH', help='length of the processing window (in pixels)')
parser.add_argument('--iter', type=int, choices=range(3, 100), metavar='MAX_ITERATIONS', help='max number of iterations')
parser.add_argument('-s', '--save-matrix', action='store_true', help='output the plot of the level energies as a PNG file instead of displaying it interactively')
parser.add_argument('wavfile', type=argparse.FileType('r'), nargs='?', help='name of the input wav file')
args = parser.parse_args()
# Global constants
# Length of the processing window over which we will extract the sparsity, shape and nonlinearity
# The default 4096 corresponds to 250ms at 16kHz sampling
WINDOW = args.window or 4096
# Length of the interval step for processing
# By default, the same as WINDOW, which makes non-overlapping windows (make them overlapping to see finer details)
INTERVAL = WINDOW
# Number-of-iterations limit used for the optimal-shape minimizer
# By default 10, which are not many, but probably enough for a no-gradients, 1D, smooth-landscape optimization
MAXITER = args.iter or 10
# Size of the dyadic multiresolution pyramid; a power of two
N = 32
# The anharmonic shape parameter r ranges from 0 to 1, with r=0 equivalent to standard trigonometry (psin -> sin, pcos -> cos)
# However, the numeric implementations do not converge at the extremes; use a tiny value in lieu of 0 and its 1-complement for 1
TINY = 1E-6
# Half to the half, or reciprocal of the Pythagoras' constant
recPytha = 1. / np.sqrt(2.)
# Function definitions
def log2int(x):
"Fast integer part of the base-2 logarithm; mostly useful for powers of two but works for any real nonzero input"
return np.frexp(x)[1] - 1
def normalization(array):
"Square root of the average square; a.k.a the L2-norm, when array values are the images of a function whose domain ranges (0, 1)"
return np.sqrt(np.mean(np.square(array)))
# There is ambiguity in the translational shift and this can induce horizontal biases in the projector.
# To solve this, we use the fftshift trick, to cancel the shift in time-frequency space (even if we do nothing with Fourier).
# With this, the duals retain time-domain localization with no shift bias.
def psin(n=N, r=TINY):
"Return an array with a period of the anharmonic sine at form r; very small for the trigonometric sine"
if not TINY <= r <= 1.-TINY:
raise ValueError('psin needs r well between 0 and 1')
psin = np.arctan(r * np.sin(2. * np.pi / n * np.arange(n)) / (1. - r * np.cos(2. * np.pi / n * np.arange(n))))
psin /= normalization(psin)
return np.fft.fftshift(psin)
def pcos(n=N, r=TINY):
"Return an array with a period of the anharmonic cosine at form r; very small for the trigonometric cosine"
if not TINY <= r <= 1.-TINY:
raise ValueError('pcos needs r well between 0 and 1')
pcos = -.5 * np.log(1. + r * r - 2. * r * np.cos(2. * np.pi / n * np.arange(n)))
pcos /= normalization(pcos)
return np.fft.fftshift(pcos)
# Use a Hanning window as the unit function. This way, our "pixel" representation or PSF is synced with the filtering.
# As a consequence, we can get rid of the border effects without altering the representation.
# The window has Linf normalization by default
# The advantage of using L2 this is that projector and representer are like valued:
def unit(n=N):
"Return an array with a unit function, consisting of a normalized Hanning window"
unit = np.hanning(n)
unit /= normalization(unit)
return unit
# We will be representing the frames and the unit function as column vectors, and we will combine them in a matrix:
def basis(n=N, r=TINY):
"Return a bidimensional array whose columns are the basis vectors"
frame = psin(n, r)
basis = unit(n)
# Now let us iteratively add the multiple resolutions:
halfN = n // 2
width = n
m = 1
while m < halfN: # Leave out the last level (1-pixel-long resolution), to avoid semiperiod cancellation with the psin
mass = np.copy(frame[::m]) * np.sqrt(m)
# Normalization (not necessary in practice):
# offset = np.mean(mass)
# mass -= offset
# weight = np.sqrt(np.sum(np.square(mass))/N) # L2
# weight = np.max(mass) # Linf
# mass /= weight
for l in range(m): # m times from 0 to m-1
preZ = l * width
postZ = n - width - preZ
atom = np.r_[np.zeros(preZ), mass, np.zeros(postZ)]
basis = np.c_[basis, atom]
width >>= 1
m <<= 1
# Correction for the last level
mass = np.empty(2)
mass[0] = +np.sqrt(n / 2.)
mass[1] = -np.sqrt(n / 2.)
for l in range(n // 2):
preZ = l * 2
postZ = n - 2 - preZ
atom = np.r_[np.zeros(preZ), mass, np.zeros(postZ)]
basis = np.c_[basis, atom]
return basis
# Consider that the dual elements (covectors), <A'_jk| are such that they resonate with their respective pairs
# and they cancel with any other, i.e., <A'_jk|A_lm> = δ_jl δ_km. Now, it is possible to extend the same requirement
# to the unit function so that <dual_i|basis_j> = δ_ij and then the duals get defined simply as a matrix inversion.
# The matrix basis is nonsingular by construction, as we canceled the mode 0 for the detail elements and used
# a positive unit function.
# Reciprocal to the unit function we have a constant function, because all the Hanusse atoms average-cancel at each period.
def getDual(n=N, r=TINY):
"Call the basis() procedure to get the basis column-vectors and get, through inversion, the dual line-covectors"
return np.linalg.inv(basis(n, r))
#plt.plot(np.transpose(getDual()[32:64,:])); plt.show()
#plt.plot(basis()[:,16:32]); plt.show()
def getAnalysis(n=N, r=TINY):
"For analysis purposes, normalize the dual"
dual = getDual(n, r)
for i in range(1, n):
normalCovector = dual[i]
normalCovector -= np.mean(normalCovector) # should already be close to 0 though
normalCovector /= normalization(normalCovector)
return dual
def project_old(signal, n=N, r=TINY):
"Window-filter a signal, project it through the covectors and return the projections (coefficients) as a bidimensional array"
window = unit(n)
dual = getDual(n, r)
length = signal.size - n
projections = np.empty([n, length])
for t in range(length):
filtdata = np.multiply(signal[t:t+n], window)
projections[:,t] = np.dot(dual, filtdata) # filtdata is 1D (not column) but nevertheless the result (also 1D) is correct
return projections
def project(signal, n=N, r=TINY, matsignal=None):
"Window-filter a signal, project it through the covectors and return the projections (coefficients) as a bidimensional array"
# Window-filter the rows of the dual matrix; this is equivalent but faster than doing it on the signal segments. which are of size n anyways
filtdual = np.multiply(getDual(n, r), unit(n)) # Dual matrix, filtered
# # # 1D window
# # 2D matrix with covectors as rows
# multiply rows element-wise
if matsignal is None:
length = signal.size - n
matsignal = np.empty([n, length])
for t in range(length):
matsignal[:,t] = signal[t:t+n]
return np.dot(filtdual, matsignal)
def lvlEnergy(signal, n=N, r=TINY, matsignal=None):
"For each set of coefficients in a same level, return its /energy/, or square root of the sum of squares"
projections = project(signal, n=n, r=r, matsignal=matsignal)
averages = projections[0] # take the first component, which should be the same as all the others
length = signal.size - n
levels = log2int(n)
lvlEnergy = np.empty([levels, length])
for j in range(levels):
lvlEnergy[j] = np.linalg.norm(projections[2**j:2*2**j], axis=0)
return averages, lvlEnergy
#thing = lvlEnergy(wav[1000:2256], n=256)[1]
#plt.matshow(thing, origin='upper', aspect='auto'); plt.colorbar(); plt.show()
def getSparsity(r, signal, n=N, matsignal=None):
"From the level energy vector, return the /sparsity/ of the representation defined as the 1-norm of this"
return np.average(np.fabs(lvlEnergy(signal, n=n, r=r, matsignal=matsignal)[1]))
# Processing starts here
if args.wavfile is None:
# The script was invoked without a filename; launch a GUI dialog to ask for one
from tkinter import Tk
from tkinter.filedialog import askopenfilename
Tk().withdraw()
in_filename = askopenfilename()
else:
in_filename = args.wavfile.name
wav = wavfile.read(in_filename)[1]
out_filename = in_filename.rpartition('.')[0] + '.txt'
out_figure = in_filename.rpartition('.')[0] + '.svg'
print("\nProcessing results outputted to file: " + out_filename + "\n")
out = open(out_filename, 'wt')
for t in range(0, wav.size-WINDOW-N, INTERVAL):
signal = wav[t:t+WINDOW+N].astype(float)
matsignal = np.empty([N, WINDOW])
for l in range(WINDOW):
matsignal[:,l] = signal[l:l+N]
fourierSparsity = getSparsity(TINY, signal, n=N, matsignal=matsignal)
result = optimize.minimize_scalar(getSparsity, args=(signal, N, matsignal), method='bounded', bounds=(TINY, 1.-TINY), options={'maxiter': MAXITER})
if fourierSparsity < result.fun:
sparsity = fourierSparsity
r = TINY
nonlin = 0.
else:
sparsity = result.fun
r = result.x
nonlin = np.log(fourierSparsity / sparsity)
sparsity /= np.average(np.fabs(signal))
out.write('{} {} {}\n'.format(sparsity, r, nonlin))
print('{:4.1f}%: {:5.1f} {:6.4f} {:6.4f}'.format(100.*t/wav.size, sparsity, r, nonlin))
out.close()
lvlEnergies = lvlEnergy(wav)[1]
plt.matshow(lvlEnergies, origin='upper', aspect='auto')
plt.colorbar()
if args.save_matrix:
plt.savefig(out_figure, dpi=2000)
print("\nPlot of level energies saved to file: " + out_figure)
#else:
# plt.show(block=True) # this shows a dialogue to save the figure too