示例#1
0
class VirtualStereoMic(object):
    def __init__(self, radius=0.1):
        self.radius = radius
        self.lmic_pos = Position(0, radius, 0, 'cartesian')
        self.rmic_pos = Position(0, -radius, 0, 'cartesian')

    def binauralize(self, sources):
        if isinstance(sources, PositionalSource):
            sources = [sources]

        l_signal, r_signal = 0, 0.
        for src in sources:
            l_dist = np.sqrt(((src.position.coords('cartesian') - self.lmic_pos.coords('cartesian'))**2).sum())
            r_dist = np.sqrt(((src.position.coords('cartesian') - self.rmic_pos.coords('cartesian'))**2).sum())

            # Time delay
            l_delay, r_delay = int(l_dist / C * src.sample_rate), int(r_dist / C * src.sample_rate)

            # Attenuation is frequency dependent, but lets simplify.
            l_attn, r_attn = 1 / (1. + l_dist), 1 / (1. + r_dist)

            l_signal += l_attn * shift(src.signal, l_delay, cval=0.) / len(sources)
            r_signal += r_attn * shift(src.signal, r_delay, cval=0.) / len(sources)

        return np.stack((l_signal, r_signal), axis=1)

    def binauralize_frame(self, sources, output, frame_no):
        if isinstance(sources, PositionalSource):
            sources = [sources]

        for src in sources:
            l_dist = np.sqrt(((src.position.coords('cartesian') - self.lmic_pos.coords('cartesian'))**2).sum())
            r_dist = np.sqrt(((src.position.coords('cartesian') - self.rmic_pos.coords('cartesian'))**2).sum())

            # Time delay
            l_delay, r_delay = int(l_dist / C * src.sample_rate), int(r_dist / C * src.sample_rate)

            # Attenuation is frequency dependent, but lets simplify.
            l_attn, r_attn = 1 / (1. + l_dist), 1 / (1. + r_dist)

            if frame_no-l_delay >= 0:
                output[frame_no, 0] += l_attn * src.signal[frame_no-l_delay] / len(sources)
            if frame_no-r_delay >= 0:
                output[frame_no, 1] += r_attn * src.signal[frame_no-r_delay] / len(sources)
示例#2
0
def audio_crop_freq_sep(ambix, center, sigma=9, thr=0.7):
    import librosa
    from scipy import ndimage
    center_stft = librosa.core.stft(project_audio(ambix, center=center)[0])
    center_stft_smooth = ndimage.gaussian_filter(np.abs(center_stft), sigma=sigma)

    other_stft_smooth = []
    for phi in np.linspace(-pi, pi, 16):
        for nu in np.linspace(-pi / 2, pi / 2, 8):
            p = Position(phi, nu, 1., 'polar')
            if (p.coords('cartesian') * center.coords('cartesian')).sum() > math.cos(pi / 2):
                continue
            stft = librosa.core.stft(project_audio(ambix, center=p)[0])
            other_stft_smooth += [ndimage.gaussian_filter(np.abs(stft), sigma=sigma)]

    other_stft_smooth = np.stack(other_stft_smooth, 0)
    rank = (np.abs(center_stft_smooth[np.newaxis]) > np.abs(other_stft_smooth)).sum(0) / other_stft_smooth.shape[0]

    stft = librosa.core.stft(project_audio(ambix, center=center)[0])
    stft[rank < thr] = 0
    wav = librosa.core.istft(stft)
    return wav