Exemplo n.º 1
0
def export(srcdirname, destination_dir, format='mp3'):
    """Exports separated music into wav or mp3

    Parameters
    ----------
    srcdirname : str
        Source Directory containing separated files
    destination_dir: str
        Destination directory
    format : {'wav','mp3'}
        File formats (codecs)
    """
    destination_dir = os.path.join(destination_dir, srcdirname)
    os.makedirs(destination_dir, exist_ok=True)
    audio_adapter = get_default_audio_adapter()
    pool = Pool()
    for track in ('vocals.wav', 'other.wav', 'bass.wav', 'drums.wav',
                  'piano.wav'):
        filepath = os.path.join(directories['tmpsplit'], srcdirname, track)
        if os.path.exists(filepath):
            if format == 'wav':
                shutil.copy2(filepath, destination_dir)
            else:
                data = list(audio_adapter.load(filepath))
                instrument = track.split('.')[0]
                pool.apply_async(
                    audio_adapter.save,
                    (os.path.join(destination_dir,
                                  f'{instrument}.mp3'), *data, 'mp3', '128k'))

    #pool.close()
    pool.join()
Exemplo n.º 2
0
def ssp(file_path, sr, cache, out_dir):
    separator = Separator('spleeter:5stems', multiprocess=False)
    audio_loader = get_default_audio_adapter()
    file_path = os.path.join(out_dir, file_path)
    waveform, _ = audio_loader.load(file_path, sample_rate=sr)
    prediction = separator.separate(waveform)

    file_name = os.path.split(file_path)[-1].split(
        f'.{filetype.guess(file_path).extension}')[0]
    cache_dir = get_save_dir(out_dir, ['ssp', file_name])
    cache_paths = [
        os.path.join(cache_dir, f'{path}.wav')
        for path in ['track', 'perc', 'bass', 'harm']
    ]
    paths_exist = check_files_exist(cache_paths)

    if cache and paths_exist:
        ssp_audios = map(lambda cache_path: librosa.load(cache_path, sr)[0],
                         cache_paths)
    else:
        audio = waveform[:, 0]
        perc = prediction['drums'][:, 0]
        bass = prediction['bass'][:, 0]
        harm = prediction['piano'][:, 0] + prediction[
            'vocals'][:, 0] + prediction['other'][:, 0]
        ssp_audios = [audio, perc, bass, harm]

    if cache and not paths_exist:
        map(
            lambda index: save_audio(cache_paths[index], ssp_audios[index], sr
                                     ), range(len(ssp_audios)))

    return ssp_audios
Exemplo n.º 3
0
    def separate_to_file(
        self,
        audio_descriptor,
        destination,
        audio_adapter=get_default_audio_adapter(),
        offset=0,
        duration=600.0,
        codec="wav",
        bitrate="128k",
        filename_format="{filename}/{instrument}.{codec}",
    ):
        """ Performs source separation and export result to file using
        given audio adapter.
        Filename format should be a Python formattable string that could use
        following parameters : {instrument}, {filename} and {codec}.
        :param audio_descriptor:    Describe song to separate, used by audio
                                    adapter to retrieve and load audio data,
                                    in case of file based audio adapter, such
                                    descriptor would be a file path.
        :param destination:         Target directory to write output to.
        :param audio_adapter:       (Optional) Audio adapter to use for I/O.
        :param offset:              (Optional) Offset of loaded song.
        :param duration:            (Optional) Duration of loaded song.
        :param codec:               (Optional) Export codec.
        :param bitrate:             (Optional) Export bitrate.
        :param filename_format:     (Optional) Filename format.
        """
        waveform, _ = audio_adapter.load(
            audio_descriptor,
            offset=offset,
            duration=duration,
            sample_rate=self._sample_rate,
        )
        sources = self.separate(waveform)
        filename = splitext(basename(audio_descriptor))[0]
        generated = []
        for instrument, data in sources.items():
            formatted_name = filename_format.format(filename=filename,
                                                    instrument=instrument,
                                                    codec=codec)
            path = join(
                destination,
                formatted_name,
            )
            if path in generated:
                raise SpleeterError(
                    (f"Separated source path conflict : {path},"
                     "please check your filename format"))
            generated.append(path)
            audio_adapter.save(path, data, self._sample_rate, codec, bitrate)

        # TODO: set to something more descriptive later
        zip_path = generate_random_filename(destination, "zip")
        with ZipFile(zip_path, "w") as zip:
            for output_path in generated:
                zip.write(output_path, arcname=basename(output_path))
        with app.app_context():
            return url_for("separated", filename=basename(zip_path))
Exemplo n.º 4
0
def splitter(path):
    '''takes a file name from youtube.py and separates into 5 stems: vocals,
    drums, bass, piano, and accompaniment'''

    separator = Separator('spleeter:5stems')
    audio_loader = get_default_audio_adapter()
    waveform, rate = audio_loader.load(path, sample_rate=None)
    prediction = separator.separate(waveform)
    # tools.clear_wavs()
    return prediction, rate
Exemplo n.º 5
0
    def Remove_background_music(self, inpath):
        # Use audio loader explicitly for loading audio waveform :

        # from spleeter.audio.adapter import get_default_audio_adapter      ​
        audio_loader = get_default_audio_adapter()
        sample_rate = 44100
        # waveform, _ = audio_loader.load('/path/to/audio/file', sample_rate=sample_rate)
        waveform, _ = audio_loader.load(inpath, sample_rate=sample_rate)
        # Perform the separation :
        prediction = separator.separate(waveform)
        print("Remove background music done!")
Exemplo n.º 6
0
 def __init__(self, bitrate=256):
     """Default constructor.
     :param config: Separator config, defaults to None
     """
     self.audio_bitrate = f'{bitrate}k'
     self.audio_format = 'mp3'
     self.sample_rate = 44100
     self.spleeter_stem = 'config/4stems-16kHz.json'
     # Use librosa backend as it is less memory intensive
     self.separator = Separator(self.spleeter_stem,
                                stft_backend='librosa',
                                multiprocess=False)
     self.audio_adapter = get_default_audio_adapter()
Exemplo n.º 7
0
def separate_one_audio_on_accompaniment_and_vocals_by_spleeter(path_to_audio, sample_rate, output_directory):
    audio_loader = get_default_audio_adapter()
    separator = Separator('spleeter:2stems')
    filename=path_to_audio.split('/')[-1].split('\\')[-1]
    waveform, _ = audio_loader.load(path_to_audio, sample_rate=sample_rate)
    # Perform the separation :
    prediction = separator.separate(waveform)
    accompaniment=prediction['accompaniment']
    vocals=prediction['vocals']
    wavfile.write(output_directory + '.'.join(filename.split('.')[:-1])+'_accompaniment'+'.wav', sample_rate, accompaniment)
    wavfile.write(output_directory + '.'.join(filename.split('.')[:-1])+'_vocals'+'.wav', sample_rate, vocals)
    del audio_loader, separator, waveform, prediction, accompaniment, vocals
    gc.collect()
Exemplo n.º 8
0
def split_it():
    separator = Separator('spleeter:4stems')

    # Using custom configuration file.
    #separator = Separator('/path/to/config.json')
    audio_loader = get_default_audio_adapter()
    sample_rate = 44100
    waveform, _ = audio_loader.load('static/audio/audio.mp3',
                                    sample_rate=sample_rate)

    # Perform the separation :

    separator.separate_to_file('static/audio/audio.mp3', 'static/')
    print('done', os.getcwd())
Exemplo n.º 9
0
def generate_fake_eval_dataset(path):
    aa = get_default_audio_adapter()
    n_songs = 2
    fs = 44100
    duration = 3
    n_channels = 2
    rng = np.random.RandomState(seed=0)
    for song in range(n_songs):
        song_path = join(path, "test", f"song{song}")
        makedirs(song_path, exist_ok=True)
        for instr in ["mixture", "vocals", "bass", "drums", "other"]:
            filename = join(song_path, f"{instr}.wav")
            data = rng.rand(duration * fs, n_channels) - 0.5
            aa.save(filename, data, fs)
Exemplo n.º 10
0
 def __init__(self, config=None):
     if config is None:
         self.audio_bitrate = '256k'
         self.audio_format = 'mp3'
         self.sample_rate = 44100
         self.spleeter_stem = 'config/4stems-16kHz.json'
     else:
         self.audio_bitrate = config['audio_bitrate']
         self.audio_format = config['audio_format']
         self.sample_rate = config['sample_rate']
         self.spleeter_stem = config['spleeter_stem']
     self.separator = Separator(self.spleeter_stem,
                                stft_backend='librosa',
                                multiprocess=False)
     self.audio_adapter = get_default_audio_adapter()
Exemplo n.º 11
0
def test_separate(configuration, instruments):
    """ Test separation from raw data. """
    adapter = get_default_audio_adapter()
    waveform, _ = adapter.load(TEST_AUDIO_DESCRIPTOR)
    separator = Separator(configuration)
    prediction = separator.separate(waveform)
    assert len(prediction) == len(instruments)
    for instrument in instruments:
        assert instrument in prediction
    for instrument in instruments:
        track = prediction[instrument]
        assert not (waveform == track).all()
        for compared in instruments:
            if instrument != compared:
                assert not (track == prediction[compared]).all()
Exemplo n.º 12
0
def test_separate(configuration, instruments, backend):
    """ Test separation from raw data. """
    adapter = get_default_audio_adapter()
    waveform, _ = adapter.load(TEST_AUDIO_DESCRIPTOR)
    separator = Separator(configuration, stft_backend=backend)
    prediction = separator.separate(waveform, TEST_AUDIO_DESCRIPTOR)
    assert len(prediction) == len(instruments)
    for instrument in instruments:
        assert instrument in prediction
    for instrument in instruments:
        track = prediction[instrument]
        assert waveform.shape == track.shape
        assert not np.allclose(waveform, track)
        for compared in instruments:
            if instrument != compared:
                assert not np.allclose(track, prediction[compared])
Exemplo n.º 13
0
 def __init__(self,
              videoPath,
              duration,
              before,
              after,
              multiThread,
              parent=None):
     super(separateQThread, self).__init__(parent)
     self.videoPath = videoPath
     self.duration = duration
     self.beforeCnt = int(before) // 20
     self.afterCnt = int(after) // 20
     self.separate = Separator('spleeter:2stems',
                               stft_backend='tensorflow',
                               multiprocess=multiThread)
     self.audioLoader = get_default_audio_adapter()
Exemplo n.º 14
0
def test_separate(test_file, configuration, backend):
    """ Test separation from raw data. """
    instruments = MODEL_TO_INST[configuration]
    adapter = get_default_audio_adapter()
    waveform, _ = adapter.load(test_file)
    separator = Separator(configuration, stft_backend=backend, multiprocess=False)
    prediction = separator.separate(waveform, test_file)
    assert len(prediction) == len(instruments)
    for instrument in instruments:
        assert instrument in prediction
    for instrument in instruments:
        track = prediction[instrument]
        assert waveform.shape[:-1] == track.shape[:-1]
        assert not np.allclose(waveform, track)
        for compared in instruments:
            if instrument != compared:
                assert not np.allclose(track, prediction[compared])
Exemplo n.º 15
0
async def initiation(kutana):
    from spleeter.audio.adapter import get_default_audio_adapter
    from spleeter.separator import Separator

    environ["GITHUB_REPOSITORY"] = "bruce-willis/Audio-denoiser"
    environ["GITHUB_RELEASE"] = "v0.1"

    config_url = "https://raw.githubusercontent.com/bruce-willis/Audio-denoiser/develop/src/training/config/voice_config.json"
    config_path = "voice_config.json"
    _ = urllib.request.urlretrieve(url=config_url, filename=config_path)

    separator = Separator(config_path)
    predictor = separator._get_predictor()
    plugin.predictor = predictor

    adapter = get_default_audio_adapter()
    plugin.adapter = adapter
Exemplo n.º 16
0
def execute(args):
    try:
        logger.info('音声認識処理開始: {0}',
                    args.audio_file,
                    decoration=MLogger.DECORATION_BOX)

        if not os.path.exists(args.audio_file):
            logger.error("指定された音声ファイルパスが存在しません。\n{0}",
                         args.video_file,
                         decoration=MLogger.DECORATION_BOX)
            return False, None

        # 親パス(指定がなければ動画のある場所。Colabはローカルで作成するので指定あり想定)
        base_path = str(pathlib.Path(args.audio_file).parent
                        ) if not args.parent_dir else args.parent_dir

        audio_adapter = get_default_audio_adapter()
        sample_rate = 44100
        waveform, _ = audio_adapter.load(args.audio_file,
                                         sample_rate=sample_rate)

        # 音声と曲に分離
        separator = Separator('spleeter:2stems')

        # Perform the separation :
        prediction = separator.separate(waveform)

        # 音声データ
        vocals = prediction['vocals']

        audio_adapter.save(f"{base_path}/vocals.wav", vocals,
                           separator._sample_rate, "wav", "16k")

        logger.info('音声認識処理終了: {0}',
                    base_path,
                    decoration=MLogger.DECORATION_BOX)

        return True
    except Exception as e:
        logger.critical("音声認識で予期せぬエラーが発生しました。",
                        e,
                        decoration=MLogger.DECORATION_BOX)
        return False
Exemplo n.º 17
0
def test_separator_backends(test_file):
    adapter = get_default_audio_adapter()
    waveform, _ = adapter.load(test_file)

    separator_lib = Separator("spleeter:2stems", stft_backend="librosa")
    separator_tf = Separator("spleeter:2stems", stft_backend="tensorflow")

    # Test the stft and inverse stft provides exact reconstruction
    stft_matrix = separator_lib._stft(waveform)
    reconstructed = separator_lib._stft(
        stft_matrix, inverse=True, length=waveform.shape[0])
    assert np.allclose(reconstructed, waveform, atol=3e-2)

    # compare both separation, it should be close
    out_tf = separator_tf._separate_tensorflow(waveform, test_file)
    out_lib = separator_lib._separate_librosa(waveform, test_file)

    for instrument in out_lib.keys():
        # test that both outputs are close everywhere
        assert np.allclose(out_tf[instrument], out_lib[instrument], atol=1e-5)
Exemplo n.º 18
0
    def __init__(self, config=None):
        """Default constructor.

        :param config: Separator config, defaults to None
        """
        if config is None:
            self.audio_bitrate = '256k'
            self.audio_format = 'mp3'
            self.sample_rate = 44100
            self.spleeter_stem = 'config/4stems-16kHz.json'
        else:
            self.audio_bitrate = config['audio_bitrate']
            self.audio_format = config['audio_format']
            self.sample_rate = config['sample_rate']
            self.spleeter_stem = config['spleeter_stem']
        # Use librosa backend as it is less memory intensive
        self.separator = Separator(self.spleeter_stem,
                                   stft_backend='librosa',
                                   multiprocess=False)
        self.audio_adapter = get_default_audio_adapter()
Exemplo n.º 19
0
    def __init__(self, stems: int, chunk_size=2):
        """
            Args:
                stems (int): total files to generate (2/3/5).
                chunk_size (int): chunk size (in seconds) indicates
                    duration size of individual chunk before splitting.
                NOTE: Longer audio file takes more memory. Hence, splitting
                    the audio is a workaround.
        """

        # specified stem loads a specific model
        # hence, it should be specified which model
        # to load.
        self.stems = stems
        #in minutes
        self.chunk_size = int(chunk_size * 60)

        self._separator = Separator(f"spleeter:{self.stems}stems")

        # spleeter specific config
        self._audio_adapter = get_default_audio_adapter()
Exemplo n.º 20
0
    def __init__(self,
                 cpu_separation: bool,
                 bitrate=256,
                 softmask=False,
                 alpha=1.0,
                 iterations=1):
        """Default constructor.
        :param config: Separator config, defaults to None
        """
        if cpu_separation:
            raise ValueError('X-UMX only works with GPU. Task aborted.')

        self.model_file = 'x-umx.h5'
        self.model_dir = Path('pretrained_models')
        self.model_file_path = self.model_dir / self.model_file
        self.context = 'cudnn'
        self.softmask = softmask
        self.alpha = alpha
        self.iterations = iterations
        self.bitrate = bitrate
        self.sample_rate = 44100
        self.residual_model = False
        self.audio_adapter = get_default_audio_adapter()
Exemplo n.º 21
0
def generate_fake_training_dataset(path, instrument_list=["vocals", "other"]):
    """
        generates a fake training dataset in path:
        - generates audio files
        - generates a csv file describing the dataset
    """
    aa = get_default_audio_adapter()
    n_songs = 2
    fs = 44100
    duration = 6
    n_channels = 2
    rng = np.random.RandomState(seed=0)
    dataset_df = pd.DataFrame(columns=["mix_path"]+[f"{instr}_path" for instr in instrument_list]+["duration"])
    for song in range(n_songs):
        song_path = join(path, "train", f"song{song}")
        makedirs(song_path, exist_ok=True)
        dataset_df.loc[song, f"duration"] = duration
        for instr in instrument_list+["mix"]:
            filename = join(song_path, f"{instr}.wav")
            data = rng.rand(duration*fs, n_channels)-0.5
            aa.save(filename, data, fs)
            dataset_df.loc[song, f"{instr}_path"] = join("train", f"song{song}", f"{instr}.wav")

    dataset_df.to_csv(join(path, "train", "train.csv"), index=False)
def adapter():
    """ Target test audio adapter fixture. """
    return get_default_audio_adapter()
Exemplo n.º 23
0
filename = args.input
outputname = args.output
if '/' not in outputname:
    outputname = './' + outputname
modelname = args.params_filename

# load model
print("load model")
separator = Separator(modelname, stft_backend='tensorflow')
separator._params['attack'] = 4
predictor = separator._get_predictor()

# load audio
print("load audio")
audio_loader = get_default_audio_adapter()
sample_rate = 44100
waveform, _ = audio_loader.load(filename, sample_rate=sample_rate)
print(waveform.dtype)
print("max amplitude: {}".format(np.max(np.abs(waveform))))

# compute spectrogram
print("compute stft")
frame_length = separator._params['frame_length']
frame_step = separator._params['frame_step']

with predictor.graph.as_default():
    stft_feature = tf.transpose(
        stft(tf.transpose(waveform),
             frame_length,
             frame_step,
Exemplo n.º 24
0
def audio_data():
    waveform, _ = get_default_audio_adapter().load(TEST_AUDIO_DESCRIPTOR,
                                                   TEST_OFFSET, TEST_DURATION,
                                                   TEST_SAMPLE_RATE)
    return waveform