def test_virtual_mic(): from pyutils.iolib.audio import load_wav, save_wav mic = VirtualStereoMic() mono, rate = load_wav('wav_test/piano.wav') mono = mono[:, 0] positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_stat_position.txt', 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] source = PositionalSource(mono, positions[0], rate) stereo = mic.binauralize([source]) save_wav('/tmp/output.wav', stereo, rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_mov_position.txt', 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] source = MovingSource(mono, positions, rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): mic.binauralize_frame([source], stereo, source.cur_idx) save_wav('/tmp/output.wav', stereo, rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def run(input_fn, position_fn, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn( 'Input waveform is not a mono source. Using only first channel.') mono = mono[:, 0] fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate) encoder = AmbiEncoder(fmt) positions = [ np.array([float(num) for num in l.strip().split()]) for l in open(position_fn, 'r') ] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] if len(positions) == 1: # Stationary source source = PositionalSource(mono, positions[0], rate) ambi = encoder.encode(source) else: source = MovingSource(mono, positions, rate) ambi = AmbisonicArray(np.zeros((mono.shape[0], fmt.num_channels)), fmt) while source.tic(): encoder.encode_frame(source, ambi, source.cur_idx) binauralizer = DirectAmbisonicBinauralizer(fmt, method='projection') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(ambi.data) save_wav(output_fn, stereo, rate)
def test_source_binauralizer(): from pyutils.iolib.audio import load_wav, save_wav from pyutils.iolib.position import read_position_file # binauralizer = SourceBinauralizer(use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3') binauralizer = SourceBinauralizer(use_hrtfs=False) # Static source sample = 'wav_test/gen_synthetic-S1' positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt') mono, rate = load_wav(wav_fns[sample_ids[0]]) source = PositionalSource(mono[:, 0], positions[sample_ids[0]][0], rate) stereo = binauralizer.binauralize([source]) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') # Moving source sample = 'wav_test/gen_synthetic-M1' positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt') mono, rate = load_wav(wav_fns[sample_ids[0]]) source = MovingSource(mono[:, 0], positions[sample_ids[0]], rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): binauralizer.binauralize_frame([source], stereo, source.cur_idx) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def run(input_fn, x, y, z, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate)) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) ambi = encoder.encode(source) save_wav(output_fn, ambi.data, rate)
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = "%d" % args.gpu tmp_ambix_fn = tempfile.mktemp(prefix='/tmp/', suffix='.wav') tmp_video_fn = tempfile.mktemp(prefix='/tmp/', suffix='.mp4') model = W2XYZ(args.model_dir) ambi_pred = model.deploy(args.input_folder, args.deploy_start, args.deploy_duration) # dur_t = model.model.duration # snd1 = model.deploy(args.input_folder, args.deploy_start - dur_t/2, args.deploy_duration + dur_t) # hann1 = np.hanning(model.model.snd_dur) # hann1 = np.tile(hann1, snd1.shape[0]/hann1.size)[:, np.newaxis] # ss = model.model.snd_dur/2 # t = int(args.deploy_duration * model.params.audio_rate) # snd1 = snd1[ss:ss+t] # hann1 = hann1[ss:ss+t] # snd2 = model.deploy(args.input_folder, args.deploy_start, args.deploy_duration + dur_t) # hann2 = np.hanning(model.model.snd_dur) # hann2 = np.tile(hann2, snd2.shape[0]/hann2.size)[:, np.newaxis] # ss = 0 # t = int(args.deploy_duration * model.params.audio_rate) # snd2 = snd2[ss:ss+t] # hann2 = hann2[ss:ss+t] # ambi_pred = (snd1 * hann1 + snd2 * hann2) / (hann1 + hann2) # Save ambisonics save_wav(tmp_ambix_fn, ambi_pred, model.params.audio_rate) if args.save_ambix: print('Saving ambisonics wav...') cmd = 'ffmpeg -y -i {} -strict -2 {}'.format(tmp_ambix_fn, args.output_fn) os.system(cmd) if args.save_video: print('Saving video...') cmd = 'ffmpeg -y -ss {} -i {} -t {} {}'.format(args.deploy_start, args.video, args.deploy_duration, tmp_video_fn) os.system(cmd) myutils.gen_360video(tmp_ambix_fn, tmp_video_fn, args.output_fn, overlay_map=args.overlay_map, inject_meta=args.VR, binauralize=not args.VR) os.remove(tmp_video_fn) os.remove(tmp_ambix_fn)
def run(input_fn, x, y, z, output_fn, use_hrtfs, hrtf_dir): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn( 'Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) stereo = binauralizer.binauralize(source) save_wav(output_fn, stereo, rate)
def run(input_fn, output_fn, overwrite=False): if overwrite and os.path.exists(output_fn): os.remove(output_fn) assert not os.path.exists(output_fn) data, rate = load_wav(input_fn) ambi_order = int(np.sqrt(data.shape[1]) - 1) fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate) binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(data) save_wav(output_fn, stereo, rate)
def run(input_fn, x, y, z, ambi_order, output_fn): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is nor a mono source. Using only first channel.') mono = mono[:, 0] encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate)) source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate) ambi = encoder.encode(source) binauralizer = DirectAmbisonicBinauralizer(ambi.format, method='projection') # binauralizer = AmbisonicBinauralizer(ambi.format, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) stereo = binauralizer.binauralize(ambi.data) save_wav(output_fn, stereo, rate)
def extract_frames(audio_fn, video_fn, frames_dir, yid, overwrite): print('\n'+'='*30+' '+yid+' '+'='*30) # Prepare directory tree if not os.path.isdir(frames_dir): os.makedirs(frames_dir) audio_dir = os.path.join(frames_dir, 'ambix') if os.path.isdir(audio_dir): if overwrite: shutil.rmtree(audio_dir) os.makedirs(audio_dir) else: os.makedirs(audio_dir) video_dir = os.path.join(frames_dir, 'video') if os.path.isdir(video_dir): if overwrite: shutil.rmtree(video_dir) os.makedirs(video_dir) else: os.makedirs(video_dir) # Open readers audio_reader = AudioReader(audio_fn) video_reader = VideoReader(video_fn) duration_secs = int(min(audio_reader.duration, video_reader.duration)) # Ambisonics print('({}) Splitting ambisonics into chunks'.format(yid)) sys.stdout.flush() reader = AudioReader(audio_fn, rate=48000) for i in range(duration_secs): chunk_fn = os.path.join(audio_dir, '{:06d}.wav'.format(i)) chunk = reader.get_chunk(reader.rate) save_wav(chunk_fn, chunk, reader.rate) # Video print('({}) Splitting video into frames'.format(yid)) sys.stdout.flush() reader = VideoReader(video_fn) num_frames = int(reader.fps * duration_secs) for i in range(num_frames): frame_fn = os.path.join(video_dir, '{:06d}.jpg'.format(i)) img = reader.get() sio.imsave(frame_fn, img)
def run(position_fn, ambi_order, output_fn, rate=24000, base_dir=None, randomize=False, overwrite=False): if overwrite and os.path.exists(output_fn): os.remove(output_fn) assert not os.path.exists(output_fn) if base_dir is None: base_dir = ESC_BASE sample_ids, positions, input_fn, _, _ = read_position_file(position_fn) source, _ = librosa.load(os.path.join(base_dir, input_fn['source']), sr=rate) bkg, _ = librosa.load(os.path.join(base_dir, input_fn['ambient']), sr=rate) Psrc = np.convolve(source ** 2, np.ones((int(rate * 0.1),)) / (rate * 0.1)).max() Pbkg = np.convolve(bkg ** 2, np.ones((int(rate * 0.1),)) / (rate * 0.1)).max() bkg *= 0.1*Psrc/Pbkg data = {} for smp_id in sample_ids: fn = os.path.join(base_dir, input_fn[smp_id]) mono, _ = librosa.load(fn, sr=rate) if mono.ndim == 2: mono = mono[:, 0] data[smp_id] = mono fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate) encoder = AmbiEncoder(fmt) sources = [MovingSource(data[smp_id], positions[smp_id], rate) for smp_id in sample_ids if len(positions[smp_id])] nframes = max([v.shape[0] for v in data.values()]) ambix = AmbisonicArray(np.zeros((nframes, fmt.num_channels)), fmt) t = -1 while all([src.tic() for src in sources]): t += 1 encoder.encode_frame(sources, ambix, t) ambix = ambix.data for smp_id in sample_ids: if len(positions[smp_id]) == 0: # Ambient sound ambix[:data[smp_id].size, 0] += data[smp_id] ambix = ambix / ambix.max() * 0.95 save_wav(output_fn, ambix, rate)
def run(input_fn, position_fn, output_fn, use_hrtfs, hrtf_dir): mono, rate = load_wav(input_fn) if mono.ndim == 2 and mono.shape[1] > 1: warnings.warn('Input waveform is not a mono source. Using only first channel.') mono = mono[:, 0] positions = [[float(num) for num in l.strip().split()] for l in open(position_fn, 'r')] positions = [Position(p[0], p[1], p[2], 'polar') for p in positions] binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir) if len(positions) == 1: # Stationary source source = PositionalSource(mono, positions[0], rate) stereo = binauralizer.binauralize(source) else: source = MovingSource(mono, positions, rate) stereo = np.zeros((mono.shape[0], 2)) while source.tic(): binauralizer.binauralize_frame(source, stereo, source.cur_idx) save_wav(output_fn, stereo, rate)
def test_ambisonics_binauralizer(): from pyutils.iolib.audio import load_wav, save_wav from pyutils.ambisonics.common import AmbiFormat sample = 'wav_test/gen_synthetic-S1' ambi, rate = load_wav(sample+'-ambix.wav') fmt = AmbiFormat(1, rate) binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv') # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3') stereo = binauralizer.binauralize(ambi) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav') sample = 'wav_test/gen_synthetic-M1' ambi, rate = load_wav(sample+'-ambix.wav') stereo = binauralizer.binauralize(ambi) save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate) os.system('play /tmp/output.wav') os.remove('/tmp/output.wav')
def gen_360video(audio_fn, video_fn, output_fn, inject_meta=False, overlay_map=False, binauralize=False, no_spatial_audio=False): from pyutils.iolib.video import VideoReader, VideoWriter from pyutils.iolib.audio import load_wav, save_wav from pyutils.ambisonics.distance import SphericalAmbisonicsVisualizer import tempfile from matplotlib import pyplot as plt from skimage.transform import resize tmp_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4') tmp_snd_file = tempfile.mktemp(dir='/tmp/', suffix='.wav') tmp_vid_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4') print('Splitting') cmd = 'ffmpeg -i {} -vn -strict -2 {}'.format(audio_fn, tmp_snd_file) print(cmd) os.system(cmd) cmd = 'ffmpeg -i {} -an -vcodec copy {}'.format(video_fn, tmp_vid_file) print(cmd) os.system(cmd) if overlay_map: print('Overlaying spherical map') tmp_vid_file2 = tempfile.mktemp(dir='/tmp/', suffix='.mp4') ambix, snd_rate = load_wav(tmp_snd_file) reader = VideoReader(tmp_vid_file, rate=10) writer = VideoWriter(tmp_vid_file2, reader.fps) ambiVis = SphericalAmbisonicsVisualizer(ambix[::5], snd_rate / 5., 5. / reader.fps, 5.) cmap = plt.cm.YlOrRd(np.linspace(0, 1, 256))[:, :3] cur_rms = ambiVis.get_next_frame() cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() + 0.005) while True: prev_rms = cur_rms cur_rms = ambiVis.get_next_frame() if cur_rms is None: break cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() + 0.005) for i in range(5): frame = reader.get() if frame is None: break beta = i / 5. rms = (1 - beta) * prev_rms + beta * cur_rms rms = rms * 2. - 0.7 rms[rms < 0] = 0 dir_map = (rms * 255).astype(int) dir_map[dir_map > 255] = 255 dir_map = resize(cmap[dir_map], reader.frame_shape[:2]) * 255 alpha = resize(rms[:, :, np.newaxis], reader.frame_shape[:2]) * 0.6 overlay = alpha * dir_map + (1 - alpha) * frame writer.write_frame(overlay.astype(np.uint8)) del writer, reader os.remove(tmp_vid_file) tmp_vid_file = tmp_vid_file2 if binauralize: print('Binauralizing') tmp_snd_file2 = tempfile.mktemp(dir='/tmp/', suffix='.wav') ambix, snd_rate = load_wav(tmp_snd_file) stereo = np.stack( [ambix[:, 0] + ambix[:, 1], ambix[:, 0] - ambix[:, 1]], 1) stereo /= (np.abs(stereo).max() / 0.95) save_wav(tmp_snd_file2, stereo, snd_rate) os.remove(tmp_snd_file) tmp_snd_file = tmp_snd_file2 print('Mixing') cmd = 'ffmpeg -y -i {} -i {} -vcodec copy -strict -2 {}'.format( tmp_snd_file, tmp_vid_file, tmp_file) print(cmd) os.system(cmd) cwd = os.getcwd() output_fn = os.path.join(cwd, output_fn) if inject_meta: print('Injecting metadata') file_dir = os.path.dirname(os.path.realpath(__file__)) spt_media_dir = os.path.realpath( os.path.join(file_dir, '3rd-party', 'spatial-media')) os.chdir(spt_media_dir) os.system('python spatialmedia -i --stereo=none {} {} {} '.format( '' if no_spatial_audio else '--spatial-audio', tmp_file, output_fn)) os.chdir(cwd) os.remove(tmp_file) else: import shutil shutil.move(tmp_file, output_fn) os.remove(tmp_snd_file) os.remove(tmp_vid_file)