def test_source_binauralizer(): from pyutils.iolib.audio import load_wav, save_wav from pyutils.iolib.position import read_position_file # binauralizer = SourceBinauralizer(use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3') binauralizer = SourceBinauralizer(use_hrtfs=False) # Static source sample = 'wav_test/gen_synthetic-S1' positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt') mono, rate = load_wav(wav_fns[sample_ids[0]]) source = PositionalSource(mono[:, 0], positions[sample_ids[0]][0], rate) stereo = binauralizer.binauralize([source]) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') # Moving source sample = 'wav_test/gen_synthetic-M1' positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt') mono, rate = load_wav(wav_fns[sample_ids[0]]) source = MovingSource(mono[:, 0], positions[sample_ids[0]], rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): binauralizer.binauralize_frame([source], stereo, source.cur_idx) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def test_ambix_emd(): from pyutils.iolib.audio import load_wav # Run from project home (spatialaudiogen/) # Load ambisonics rate = 24000 ambi1, _ = load_wav('data/wav_test/hello-left2right-ambix.wav', rate=rate) ambi2, _ = load_wav('data/wav_test/hello-statright-ambix.wav', rate=rate) print('Same FOA: EMD =', ambix_emd(ambi1, ambi1, rate)) print('Diff FOA: EMD =', ambix_emd(ambi1, ambi2, rate))
def run(input_fn, position_fn, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn( 'Input waveform is not a mono source. Using only first channel.') mono = mono[:, 0] fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate) encoder = AmbiEncoder(fmt) positions = [ np.array([float(num) for num in l.strip().split()]) for l in open(position_fn, 'r') ] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] if len(positions) == 1: # Stationary source source = PositionalSource(mono, positions[0], rate) ambi = encoder.encode(source) else: source = MovingSource(mono, positions, rate) ambi = AmbisonicArray(np.zeros((mono.shape[0], fmt.num_channels)), fmt) while source.tic(): encoder.encode_frame(source, ambi, source.cur_idx) binauralizer = DirectAmbisonicBinauralizer(fmt, method='projection') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(ambi.data) save_wav(output_fn, stereo, rate)
def run(input_fn, output_fn, position_fn='', angular_res=''): data, rate = load_wav(input_fn) duration = data.shape[0] / float(rate) ambiVis = SphericalAmbisonicsVisualizer(data, rate, angular_res=angular_res) if position_fn: srcVis = SphericalSourceVisualizer(position_fn, duration, ambiVis.visualization_rate(), angular_res=angular_res) writer = VideoWriter(output_fn, video_fps=ambiVis.visualization_rate(), width=ambiVis.frame_dims[1], height=ambiVis.frame_dims[0], rgb=True) cmap = np.stack(plt.get_cmap('inferno').colors) while True: frame = ambiVis.get_next_frame() if frame is None: break frame /= frame.max() # Super-impose gt position if position_fn: frame += srcVis.get_next_frame() # Process frame and write to disk frame = ((frame / frame.max()) * 255).astype(np.uint8) frame = (cmap[frame] * 255).astype(np.uint8) # Add colormap writer.write_frame(frame)
def test_virtual_mic(): from pyutils.iolib.audio import load_wav, save_wav mic = VirtualStereoMic() mono, rate = load_wav('wav_test/piano.wav') mono = mono[:, 0] positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_stat_position.txt', 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] source = PositionalSource(mono, positions[0], rate) stereo = mic.binauralize([source]) save_wav('/tmp/output.wav', stereo, rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_mov_position.txt', 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] source = MovingSource(mono, positions, rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): mic.binauralize_frame([source], stereo, source.cur_idx) save_wav('/tmp/output.wav', stereo, rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def __init__(self, dirname): elevation = np.array([ -45, -39, -34, -28, -23, -17, -11, -6, 0, 6, 11, 17, 23, 28, 34, 39, 45, 51, 56, 62, 68, 73, 79, 84, 90, 96, 101, 107, 113, 118, 124, 129, 135, 141, 146, 152, 158, 163, 169, 174, 180, 186, 191, 197, 203, 208, 214, 219, 225, 231 ]) azimuth = np.array([ -80, -65, -55, -45, -35, -30, -25, -20, -15, -10, -5, 0, 5, 10, 15, 20, 25, 30, 35, 45, 55, 65, 80 ]) self.right_hrir = np.zeros((200, len(azimuth), len(elevation))) self.left_hrir = np.zeros((200, len(azimuth), len(elevation))) for i, phi in enumerate(azimuth): right_fn = ('neg' if phi < 0 else '') + str( abs(phi)) + 'azright.wav' left_fn = ('neg' if phi < 0 else '') + str(abs(phi)) + 'azleft.wav' self.right_hrir[:, i, :] = np.flip(load_wav( os.path.join(dirname, right_fn))[0], axis=0) self.left_hrir[:, i, :] = np.flip(load_wav( os.path.join(dirname, left_fn))[0], axis=0) radius = 3. self.hrir_db = [] for i, az in enumerate(azimuth): for j, elev in enumerate(elevation): xp = radius * cos(elev * pi / 180.) * sin(az * pi / 180.) yp = radius * cos(elev * pi / 180.) * cos(az * pi / 180.) zp = radius * sin(elev * pi / 180.) x, y, z = yp, -xp, zp # x, y, z = xp, yp, zp p = Position(x, y, z, 'cartesian') self.hrir_db.append( (p, self.left_hrir[:, i, j], self.right_hrir[:, i, j])) self.kdt = KDTree(np.array([ hrir[0].coords('cartesian') / np.linalg.norm(hrir[0].coords('cartesian')) for hrir in self.hrir_db ]), leaf_size=2, metric='euclidean')
def run(input_fn, x, y, z, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate)) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) ambi = encoder.encode(source) save_wav(output_fn, ambi.data, rate)
def test_moving_source(): from pyutils.iolib.audio import load_wav mono, rate = load_wav('wav_test/piano.wav') mono = mono[:, 0] position_fn = 'wav_test/piano_mov_position.txt' positions = [np.array([float(num) for num in l.strip().split()]) for l in open(position_fn, 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] source = MovingSource(mono, positions, rate) while source.tic(): source.position.print_position('polar')
def run(input_fn, x, y, z, output_fn, use_hrtfs, hrtf_dir): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn( 'Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) stereo = binauralizer.binauralize(source) save_wav(output_fn, stereo, rate)
def run(input_fn, output_fn, overwrite=False): if overwrite and os.path.exists(output_fn): os.remove(output_fn) assert not os.path.exists(output_fn) data, rate = load_wav(input_fn) ambi_order = int(np.sqrt(data.shape[1]) - 1) fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate) binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(data) save_wav(output_fn, stereo, rate)
def run(input_fn, x, y, z, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate)) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) ambi = encoder.encode(source) binauralizer = DirectAmbisonicBinauralizer(ambi.format, method='projection') # binauralizer = AmbisonicBinauralizer(ambi.format, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(ambi.data) save_wav(output_fn, stereo, rate)
def test_ambisonics_binauralizer(): from pyutils.iolib.audio import load_wav, save_wav from pyutils.ambisonics.common import AmbiFormat sample = 'wav_test/gen_synthetic-S1' ambi, rate = load_wav(sample+'-ambix.wav') fmt = AmbiFormat(1, rate) binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3') stereo = binauralizer.binauralize(ambi) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') sample = 'wav_test/gen_synthetic-M1' ambi, rate = load_wav(sample+'-ambix.wav') stereo = binauralizer.binauralize(ambi) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def run(input_fn, position_fn, output_fn, use_hrtfs, hrtf_dir): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is not a mono source. Using only first channel.') mono = mono[:, 0] positions = [[float(num) for num in l.strip().split()] for l in open(position_fn, 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) if len(positions) == 1: # Stationary source source = PositionalSource(mono, positions[0], rate) stereo = binauralizer.binauralize(source) else: source = MovingSource(mono, positions, rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): binauralizer.binauralize_frame(source, stereo, source.cur_idx) save_wav(output_fn, stereo, rate)
def get(self, start_time, size, rotation=None): # Check if padding is necessary start_frame = int(start_time * self.rate) pad_before, pad_after = 0, 0 if start_frame < 0: pad_before = abs(start_frame) size -= pad_before start_time, start_frame = 0., 0 if start_frame + size > self.num_frames: pad_after = start_frame + size - self.num_frames size -= pad_after # Load audio index = range(int(start_time), min(int(np.ceil(start_time + size / float(self.rate))), self.num_files)) fns = ['{}/{:06d}.wav'.format(self.audio_folder, i) for i in index] chunk = [load_wav(fn, self.rate)[0] for fn in fns] chunk = np.concatenate(chunk, axis=0) if len(chunk) > 1 else chunk[0] ss = int((start_time - int(start_time)) * self.rate) chunk = chunk[ss:ss + size, :self.num_channels] # Pad if pad_before > 0: pad = np.zeros((pad_before, self.num_channels)) chunk = np.concatenate((pad, chunk), axis=0) if pad_after > 0: pad = np.zeros((pad_after, self.num_channels)) chunk = np.concatenate((chunk, pad), axis=0) # Apply rotation if rotation is not None: assert -np.pi <= rotation < np.pi c = np.cos(rotation) s = np.sin(rotation) rot_mtx = np.array([[1, 0, 0, 0], # W' = W [0, c, 0, s], # Y' = X sin + Y cos [0, 0, 1, 0], # Z' = Z [0, -s, 0, c]]) # X' = X cos - Y sin chunk = np.dot(chunk, rot_mtx.T) return chunk
def test_emd(): from pyutils.iolib.audio import load_wav # Load ambisonics ang_res = 10 sample = 'wav_test/gen_synthetic-M1' data, rate = load_wav(sample + '-ambix.wav') duration = data.shape[0] / float(rate) ambiVis = SphericalAmbisonicsVisualizer(data, rate, window=0.1, angular_res=ang_res) # vid_reader = VideoReader(sample+'.avi', ambiVis.visualization_rate(), # image_preprocessing=lambda x: resize(rgb2gray(x), ambiVis.phi_mesh.shape)) srcVis = SphericalSourceVisualizer(sample + '-position.txt', duration, rate=ambiVis.visualization_rate(), angular_res=ang_res) for rms, frame in izip(ambiVis.loop_frames(), srcVis.loop_frames()): print(emd(rms, frame, ambiVis.phi_mesh, ambiVis.nu_mesh))
def gen_360video(audio_fn, video_fn, output_fn, inject_meta=False, overlay_map=False, binauralize=False, no_spatial_audio=False): from pyutils.iolib.video import VideoReader, VideoWriter from pyutils.iolib.audio import load_wav, save_wav from pyutils.ambisonics.distance import SphericalAmbisonicsVisualizer import tempfile from matplotlib import pyplot as plt from skimage.transform import resize tmp_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4') tmp_snd_file = tempfile.mktemp(dir='/tmp/', suffix='.wav') tmp_vid_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4') print('Splitting') cmd = 'ffmpeg -i {} -vn -strict -2 {}'.format(audio_fn, tmp_snd_file) print(cmd) os.system(cmd) cmd = 'ffmpeg -i {} -an -vcodec copy {}'.format(video_fn, tmp_vid_file) print(cmd) os.system(cmd) if overlay_map: print('Overlaying spherical map') tmp_vid_file2 = tempfile.mktemp(dir='/tmp/', suffix='.mp4') ambix, snd_rate = load_wav(tmp_snd_file) reader = VideoReader(tmp_vid_file, rate=10) writer = VideoWriter(tmp_vid_file2, reader.fps) ambiVis = SphericalAmbisonicsVisualizer(ambix[::5], snd_rate / 5., 5. / reader.fps, 5.) cmap = plt.cm.YlOrRd(np.linspace(0, 1, 256))[:, :3] cur_rms = ambiVis.get_next_frame() cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() + 0.005) while True: prev_rms = cur_rms cur_rms = ambiVis.get_next_frame() if cur_rms is None: break cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() + 0.005) for i in range(5): frame = reader.get() if frame is None: break beta = i / 5. rms = (1 - beta) * prev_rms + beta * cur_rms rms = rms * 2. - 0.7 rms[rms < 0] = 0 dir_map = (rms * 255).astype(int) dir_map[dir_map > 255] = 255 dir_map = resize(cmap[dir_map], reader.frame_shape[:2]) * 255 alpha = resize(rms[:, :, np.newaxis], reader.frame_shape[:2]) * 0.6 overlay = alpha * dir_map + (1 - alpha) * frame writer.write_frame(overlay.astype(np.uint8)) del writer, reader os.remove(tmp_vid_file) tmp_vid_file = tmp_vid_file2 if binauralize: print('Binauralizing') tmp_snd_file2 = tempfile.mktemp(dir='/tmp/', suffix='.wav') ambix, snd_rate = load_wav(tmp_snd_file) stereo = np.stack( [ambix[:, 0] + ambix[:, 1], ambix[:, 0] - ambix[:, 1]], 1) stereo /= (np.abs(stereo).max() / 0.95) save_wav(tmp_snd_file2, stereo, snd_rate) os.remove(tmp_snd_file) tmp_snd_file = tmp_snd_file2 print('Mixing') cmd = 'ffmpeg -y -i {} -i {} -vcodec copy -strict -2 {}'.format( tmp_snd_file, tmp_vid_file, tmp_file) print(cmd) os.system(cmd) cwd = os.getcwd() output_fn = os.path.join(cwd, output_fn) if inject_meta: print('Injecting metadata') file_dir = os.path.dirname(os.path.realpath(__file__)) spt_media_dir = os.path.realpath( os.path.join(file_dir, '3rd-party', 'spatial-media')) os.chdir(spt_media_dir) os.system('python spatialmedia -i --stereo=none {} {} {} '.format( '' if no_spatial_audio else '--spatial-audio', tmp_file, output_fn)) os.chdir(cwd) os.remove(tmp_file) else: import shutil shutil.move(tmp_file, output_fn) os.remove(tmp_snd_file) os.remove(tmp_vid_file)