-
Notifications
You must be signed in to change notification settings - Fork 1
/
spikeGramTest.py
140 lines (126 loc) · 6.02 KB
/
spikeGramTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# spikeGramTest.py
# A test implementation of spike gram coding using matching pursuit
#
# Created by Colin Raffel on 10/30/12
import numpy as np
import matplotlib.pyplot as plt
import utility
import sys
import os
import optparse
import scipy.signal
import multiprocessing as mp
import time
import ERBFilters
import fastAbsArgMax
# Encodes an input signal "x" with elements from "dictionary
def matchingPursuit( dictionary, x, SNRThreshold=20, scaleThreshold=0, maxIterations=50000 ):
# Find the biggest kernel
biggestKernelSize = 0
for n in dictionary:
if dictionary[n].shape[0] > biggestKernelSize:
biggestKernelSize = dictionary[n].shape[0]
# Residual of the encoding process, initialize to x with zero padding
residual = np.append( np.append( np.zeros( biggestKernelSize), x ), np.zeros(biggestKernelSize) )
# Number of kernels
nKernels = len( dictionary )
# Make sure input signal is longer than all kernels
for n in np.arange( nKernels ):
assert( dictionary[n].shape[0] < residual.shape[0] )
# Hold onto the cross-correlations at each iteration
correlations = np.zeros( (nKernels, residual.shape[0]) )
# Keep track of the iterations
currentIteration = 0
# Where we'll be storing the kernels, scales, and offsets
scales = np.zeros( maxIterations )
kernels = np.zeros( maxIterations )
offsets = np.zeros( maxIterations )
# For keeping track of how long we've run
lastTime = time.time()
# For checking whether we've converged
currentResidualSNR = -np.inf
signalNorm = np.dot( x, x )
# Until the max of the residual is smaller than our amplitudeThreshold value
while currentResidualSNR < SNRThreshold:
# On first iteration, do the whole correlation
if currentIteration == 0:
for n in xrange( nKernels ):
if currentIteration == 0:
correlations[n] = scipy.signal.fftconvolve( residual, dictionary[n][::-1], 'same' )
# On subsequent iterations, only re-correlate where it has changed
else:
# Where does the change in the residual start?
changeStart = offsets[currentIteration - 1]
# Look up the previous kernel subtracted from the residual - its size is where the change in the residual ends
changeEnd = changeStart + dictionary[kernels[currentIteration - 1]].shape[0]
for n in xrange( nKernels ):
# What's the size of current kernel to be correlated?
kernelSize = dictionary[n].shape[0]
# Where should we start correlating this kernel for no edge effects?
correlationStart = changeStart - 2*kernelSize
# This should never happen, but just in case
if correlationStart < 0:
correlationStart = 0
# Correlate with no edge effects
#correlation = np.correlate( residual[correlationStart:changeEnd + kernelSize - 1], dictionary[n], 'valid' )
correlation = scipy.signal.fftconvolve( residual[correlationStart:changeEnd + kernelSize - 1], dictionary[n][::-1], 'valid' )
# [:correlation.shape[0]] is an indexing hack to avoid shape mismatches
correlations[n, correlationStart + kernelSize/2:changeEnd + kernelSize/2][:correlation.shape[0]] = correlation
# Get the kernel index and sample offset and store them
bestKernelAndOffset = fastAbsArgMax.fastAbsArgMax( correlations )#np.unravel_index( np.argmax( np.abs( correlations ) ), correlations.shape )
kernels[currentIteration] = bestKernelAndOffset[0]
offsets[currentIteration] = bestKernelAndOffset[1]
# Get the kernel scale
scales[currentIteration] = correlations[kernels[currentIteration], offsets[currentIteration]]
# Get the kernel that turned out to be the best
kernel = dictionary[kernels[currentIteration]]*scales[currentIteration]
# The actual offset is the correlation offset - (the kernel size/2)
offsets[currentIteration] -= kernel.shape[0]/2
# Subtract out the shfited kernel to get the new residual
residual[offsets[currentIteration]:offsets[currentIteration] + kernel.shape[0]] -= kernel
currentResidualSNR = 10*np.log10(signalNorm/np.dot(residual[biggestKernelSize:-biggestKernelSize], residual[biggestKernelSize:-biggestKernelSize]))
print "Iteration {}, time = {:.3f}, SNR = {:.3f}".format( currentIteration, time.time() - lastTime, currentResidualSNR )
lastTime = time.time()
if np.abs(scales[currentIteration]) < scaleThreshold:
break
# Next iteration...
currentIteration += 1
if currentIteration > maxIterations - 1:
break
# Trim return arrays
scales = scales[:currentIteration]
kernels = kernels[:currentIteration]
offsets = offsets[:currentIteration]
# Initialize return signal
returnSignal = np.zeros( residual.shape[0] )
# Sum in the kernels
for n in np.arange( currentIteration ):
kernel = dictionary[kernels[n]]
returnSignal[offsets[n]:offsets[n] + kernel.shape[0]] += scales[n]*kernel
# Trim the zero padding
returnSignal = returnSignal[biggestKernelSize:-1]
residual = residual[biggestKernelSize:-1]
return returnSignal, residual, scales, kernels, offsets
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Usage: %s audio.wav" % (sys.argv[0])
sys.exit()
audioData, fs = utility.getAudioData( sys.argv[1] )
# Create impulse
impulse = np.zeros( 10000 )
impulse[0] = 1.0
kernelDictionary = ERBFilters.ERBFiltersToKernels( impulse, ERBFilters.makeERBFilters( fs, 200, 100 ) )
reconstructedSignal, residual, scales, kernels, offsets = matchingPursuit( kernelDictionary, audioData, 16, 0, 1000000 )#(2000.0*audioData.shape[0])/fs )
'''plt.subplot(211)
plt.plot( audioData )
plt.subplot(212)
plt.plot( reconstructedSignal )
plt.plot( residual )
plt.show()'''
basename = os.path.splitext( sys.argv[1] )[0]
utility.writeWav( reconstructedSignal, fs, basename + "Reconstructed.wav" )
scalesKernelsAndOffsets = np.zeros( (scales.shape[0], 3) )
scalesKernelsAndOffsets[:, 0] = scales
scalesKernelsAndOffsets[:, 1] = kernels
scalesKernelsAndOffsets[:, 2] = offsets
np.save( basename + 'ReconstructedScalesKernelsAndOffsets.npy', scalesKernelsAndOffsets )