def test_nistems(): mp4exc = stempeg.cmds.find_cmd("MP4Box") stems, rate = stempeg.read_stems(stempeg.example_stem_path()) with tmp.NamedTemporaryFile(delete=False, suffix='.m4a') as tempfile: stempeg.write_stems(tempfile.name, stems, sample_rate=rate, writer=stempeg.NIStemsWriter()) callArgs = [mp4exc] callArgs.extend(["-dump-udta", "0:stem", tempfile.name]) sp.check_call(callArgs) root, ext = os.path.splitext(tempfile.name) udtaFile = root + "_stem.udta" with open(stempeg.default_metadata()) as f: d_metadata = json.load(f) try: fileObj = codecs.open(udtaFile, encoding="utf-8") fileObj.seek(8) l_metadata = json.load(fileObj) except json.decoder.JSONDecodeError: with open(udtaFile) as json_file: l_metadata = json.load(json_file) assert ordered(l_metadata) == ordered(d_metadata)
def test_shape(nb_samples): R = np.random.random((5, nb_samples, 2)) stempeg.write_stems("./random.stem.m4a", R, writer=stempeg.StreamsWriter()) S, rate = stempeg.read_stems("./random.stem.m4a") assert S.shape[0] == R.shape[0] assert S.shape[2] == R.shape[2] assert S.shape[1] % 1024 == 0
def test_multifileformats(audio, multifile_format, nb_stems): with tmp.NamedTemporaryFile(delete=False, suffix='.' + multifile_format) as tempfile: stem_names = [str(k) for k in range(nb_stems)] stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=stempeg.FilesWriter(stem_names=stem_names))
def test_shape(nb_samples): R = np.random.random((5, nb_samples, 2)) stempeg.write_stems(R, "./random.stem.mp4") S, rate = stempeg.read_stems("./random.stem.mp4") assert S.shape[0] == R.shape[0] assert S.shape[2] == R.shape[2] assert S.shape[1] % 1024 == 0
def test_ffmpeg_errors(audio): if audio.ndim == 3: with pytest.raises(RuntimeError): with tmp.NamedTemporaryFile(delete=False, suffix='.wav') as tempfile: stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=stempeg.StreamsWriter())
def test_multichannel_containers(audio, nb_channels, multichannel_format): with tmp.NamedTemporaryFile(delete=False, suffix='.' + multichannel_format) as tempfile: stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=ChannelsWriter()) loaded_audio, rate = stempeg.read_stems( tempfile.name, always_3d=True, reader=stempeg.ChannelsReader(nb_channels=nb_channels)) assert audio.shape == loaded_audio.shape
def test_multistream_containers(audio, multistream_format, nb_stems): if nb_stems > 1: with tmp.NamedTemporaryFile(delete=False, suffix='.' + multistream_format) as tempfile: stem_names = [str(k) for k in range(nb_stems)] stempeg.write_stems(tempfile.name, audio, sample_rate=44100, writer=stempeg.StreamsWriter( codec='aac', stem_names=stem_names)) loaded_audio, rate = stempeg.read_stems(tempfile.name, always_3d=True) assert audio.shape == loaded_audio.shape if multistream_format == "m4a": info = stempeg.Info(tempfile.name) loaded_stem_names = info.title_streams # check if titles could be extracted assert all( [a == b for a, b in zip(stem_names, loaded_stem_names)])
import stempeg # 0 - The mixture, # 1 - The drums, # 2 - The bass, # 3 - The rest of the accompaniment, # 4 - The vocals. # example S, rate = stempeg.read_stems(stempeg.example_stem_path()) stempeg.write_stems( "output.mp4", S, sample_rate=rate, writer=stempeg.StreamsWriter()) S, rate = stempeg.read_stems("C:/Users/hahla/Downloads/output.mp4", stem_id=[0]) stems_folder = "D:/Development/github/GAN-tests/audio_files_split/audio_files_001" filename_mix = "D:/Development/github/GAN-tests/audio_files_split/audio_file_mixture_0002.wav" filename_drums = "D:/Development/github/GAN-tests/audio_files_split/audio_file_hits_0002.wav" filename_bass = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav" filename_other = "D:/Development/github/GAN-tests/audio_files_split/audio_file_background_0002.wav" filename_vocals = "D:/Development/github/GAN-tests/audio_files_split/audio_file_soundless_audio_0002.wav" S_filename_mix, rate = stempeg.read_stems(filename_mix, stem_id=0)
def test_write(): S, rate = stempeg.read_stems( "tests/data/The Easton Ellises - Falcon 69.stem.mp4" ) stempeg.write_stems(S, "./stems.mp4")
args = parser.parse_args() # load stems stems, rate = stempeg.read_stems(args.input) # load stems, # resample to 96000 Hz, # use multiprocessing stems, rate = stempeg.read_stems(args.input, sample_rate=96000, multiprocess=True) # --> stems now has `shape=(stem x samples x channels)`` # save stems from tensor as multi-stream mp4 stempeg.write_stems("test.stem.m4a", stems, sample_rate=96000) # save stems as dict for convenience stems = { "mix": stems[0], "drums": stems[1], "bass": stems[2], "other": stems[3], "vocals": stems[4], } # keys will be automatically used # from dict as files stempeg.write_stems("test.stem.m4a", data=stems, sample_rate=96000) # `write_stems` is a preset for the following settings
def test_write(): S, rate = stempeg.read_stems(stempeg.example_stem_path()) stempeg.write_stems(S, "./stems.mp4")
def separate(): parser = argparse.ArgumentParser( description="UMX Inference", add_help=True, formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument("input", type=str, nargs="+", help="List of paths to wav/flac files.") parser.add_argument( "--model", default="umxhq", type=str, help="path to mode base directory of pretrained models", ) parser.add_argument( "--targets", nargs="+", type=str, help="provide targets to be processed. \ If none, all available targets will be computed", ) parser.add_argument( "--outdir", type=str, help="Results path where audio evaluation results are stored", ) parser.add_argument( "--ext", type=str, default=".wav", help="Output extension which sets the audio format", ) parser.add_argument("--start", type=float, default=0.0, help="Audio chunk start in seconds") parser.add_argument( "--duration", type=float, help="Audio chunk duration in seconds, negative values load full track", ) parser.add_argument("--no-cuda", action="store_true", default=False, help="disables CUDA inference") parser.add_argument( "--audio-backend", type=str, default="sox_io", help="Set torchaudio backend " "(`sox_io`, `sox`, `soundfile` or `stempeg`), defaults to `sox_io`", ) parser.add_argument( "--niter", type=int, default=1, help="number of iterations for refining results.", ) parser.add_argument( "--wiener-win-len", type=int, default=300, help="Number of frames on which to apply filtering independently", ) parser.add_argument( "--residual", type=str, default=None, help="if provided, build a source with given name" "for the mix minus all estimated targets", ) parser.add_argument( "--aggregate", type=str, default=None, help="if provided, must be a string containing a valid expression for " "a dictionary, with keys as output target names, and values " "a list of targets that are used to build it. For instance: " '\'{"vocals":["vocals"], "accompaniment":["drums",' '"bass","other"]}\'', ) parser.add_argument( "--filterbank", type=str, default="torch", help="filterbank implementation method. " "Supported: `['torch', 'asteroid']`. `torch` is ~30% faster" "compared to `asteroid` on large FFT sizes such as 4096. However" "asteroids stft can be exported to onnx, which makes is practical" "for deployment.", ) args = parser.parse_args() if args.audio_backend != "stempeg": torchaudio.set_audio_backend(args.audio_backend) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print("Using ", device) # parsing the output dict aggregate_dict = None if args.aggregate is None else json.loads( args.aggregate) # create separator only once to reduce model loading # when using multiple files separator = utils.load_separator( model_str_or_path=args.model, targets=args.targets, niter=args.niter, residual=args.residual, wiener_win_len=args.wiener_win_len, device=device, pretrained=True, filterbank=args.filterbank, ) separator.freeze() separator.to(device) if args.audio_backend == "stempeg": try: import stempeg except ImportError: raise RuntimeError("Please install pip package `stempeg`") # loop over the files for input_file in args.input: if args.audio_backend == "stempeg": audio, rate = stempeg.read_stems( input_file, start=args.start, duration=args.duration, sample_rate=separator.sample_rate, dtype=np.float32, ) audio = torch.tensor(audio) else: audio, rate = data.load_audio(input_file, start=args.start, dur=args.duration) estimates = predict.separate( audio=audio, rate=rate, aggregate_dict=aggregate_dict, separator=separator, device=device, ) if not args.outdir: model_path = Path(args.model) if not model_path.exists(): outdir = Path(Path(input_file).stem + "_" + args.model) else: outdir = Path(Path(input_file).stem + "_" + model_path.stem) else: outdir = Path(args.outdir) / Path(input_file).stem outdir.mkdir(exist_ok=True, parents=True) # write out estimates if args.audio_backend == "stempeg": target_path = str(outdir / Path("target").with_suffix(args.ext)) # convert torch dict to numpy dict estimates_numpy = {} for target, estimate in estimates.items(): estimates_numpy[target] = torch.squeeze( estimate).detach().cpu().numpy().T stempeg.write_stems( target_path, estimates_numpy, sample_rate=separator.sample_rate, writer=stempeg.FilesWriter(multiprocess=True, output_sample_rate=rate), ) else: for target, estimate in estimates.items(): target_path = str(outdir / Path(target).with_suffix(args.ext)) torchaudio.save( target_path, torch.squeeze(estimate).to("cpu"), sample_rate=separator.sample_rate, )
"""Opens a stem file and saves (reencodes) back to a stem file """ import argparse import stempeg import numpy as np from os import path as op if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('input', ) args = parser.parse_args() # read stems stems, rate = stempeg.read_stems(args.input) print(stems.shape) stempeg.write_stems(stems, "stems.mp4") stems2, rate = stempeg.read_stems("stems.mp4") print(stems2.shape)