Пример #1
def get_vocal(mix_file, bg_file, lyric_file, out_file="out.wav"):
    print("\ndeal with ---", bg_file, "---")
    # 讀audio檔
    mix, sr = librosa.load(mix_file, sr=None)
    bg, sr = librosa.load(bg_file, sr=None)

    # 取得前奏的時間
    l = lyric_parser.Lyric(lyric_file)
    time = l.get_time_before_vocal()

    # 單位變換,從ms換成sample
    time = ms2sample(time - 1000, sr)
    time = time // 2

    # 前處理
    mix = mute_start(mix)
    bg = mute_start(bg)

    # 計算位移,音量
    shift, vol = compute_t_v(mix, bg, time)

    # 前處理
    mix, bg = npp.pad_the_same(mix, bg)
    mix2 = npp.right_shift(mix, shift)

    # 訊號相減
    result = mix2 * vol - bg

    # try
    # result = mix2*1.5151645 - bg

    # 輸出
    librosa.output.write_wav(out_file, result, sr)
def get_vocal_mp3_void(mix_file, bg_file, lyric_file, out_file="out.mp3"):
    # card_sample = ['clear', 'shift', 'volume_ratio',
    #                'song_sr', 'bg_sr', 'result_sr',
    #                'song_duration', 'bg_duration', 'result_duration', 'result_norm_ave',
    #                'error_text']
    card = [' ', ' ', ' ',
            ' ', ' ', ' ',
            ' ', ' ', ' ', ' ', ' ']

    # 讀audio檔
    bg, sr_bg = librosa.load(bg_file, sr=None)
    mix, sr_mix = librosa.load(mix_file, sr=None)

    # TODO: plt
    # import matplotlib.pyplot as plt
    # mix1, sr_mix = librosa.load(mix_file, sr=None, duration=10)
    # bg1, sr_bg = librosa.load(bg_file, sr=None, duration=10)
    # import librosa.display as d
    # import matplotlib.pyplot as plt
    # plt.figure()
    # d.waveplot(mix1, sr=sr_mix)
    # plt.title('mix')
    # plt.figure()
    # d.waveplot(bg1, sr=sr_bg)
    # plt.title('bg')
    # ###

    song_duration = len(mix)
    bg_duration = len(bg)

    if sr_mix != sr_bg:
        card = [0, ' ', ' ',
                sr_mix, sr_bg, ' ',
                song_duration, bg_duration, ' ', ' ', 'sr not match']

        # 輸出result with norm_value_fileName,回傳card
        fn = os.path.basename(out_file)
        fn = "sr_NOTmatch_" + fn
        dn = os.path.dirname(out_file)
        dn = os.path.join(dn, "error")
        fn = os.path.join(dn, fn)

        # Write null file with only filename
        result = np.array([])
        np.savetxt(fn, result)

        return card
    elif sr_mix == 44100:
        # shift = -1728
        shift = 0
    elif sr_mix == 22050:
        # shift = -1728 / 2
        shift = 0
        card = [0, ' ', ' ',
                sr_mix, sr_bg, ' ',
                song_duration, bg_duration, ' ', ' ', 'get_vocal error of if(sr_mix != sr_bg) function']

        # 輸出result with norm_value_fileName,回傳card
        fn = os.path.basename(out_file)
        fn = "sr_NOTmatch_" + fn
        dn = os.path.dirname(out_file)
        dn = os.path.join(dn, "error")
        fn = os.path.join(dn, fn)

        # Write null file with only filename
        result = np.array([])
        np.savetxt(fn, result)

        # Write null file with only filename
        result = np.array([])
        np.savetxt(fn, result)

        return card

    # sr is coming~
    sr = sr_mix
    sr_result = sr

    # get intro time
    l = lyric_parser.Lyric(lyric_file)
    time = l.get_time_before_vocal()

    if time < 1000:
        card[0] = 0
        card[10] = 'lyric might have wrong start (intro time < 1000)'

        # 輸出result with norm_value_fileName,回傳card
        fn = os.path.basename(out_file)
        fn = "lyric_wrongSTART_" + fn
        dn = os.path.dirname(out_file)
        dn = os.path.join(dn, "error")
        fn = os.path.join(dn, fn)

        # Write null file with only filename
        result = np.array([])
        np.savetxt(fn, result)

        return card

    # change ms 2 sample rate
    time = ms2sample(time - 1000, sr)
    time = time // 2

    # mute start to avoid blast sound
    mix = mute_start(mix)
    bg = mute_start(bg)

    # calculate volume rate
    vol, bg_norm = compute_vol_ratio_bgNorm(mix, bg, time)

    # some preprocessing and shift
    mix, bg = npp.pad_the_same(mix, bg)
    mix_shift = npp.right_shift(mix, shift)

    # devocal
    result = mix_shift[:song_duration] * vol - bg[:song_duration]
    # avoid lst blast (1728+1000)
    result = result[:len(result) - 3000]
    result_duration = len(result)

    # clear or not
    result_norm = linalg.norm(result[:time], ord=1)
    result_norm_ave = result_norm / time
    result_norm_ave = int(result_norm_ave * 10000000)

    if result_norm < bg_norm:

        fn = os.path.basename(out_file)
        fn = str(int(result_norm_ave)) + "_" + fn
        dn = os.path.dirname(out_file)
        dn = os.path.join(dn, "perfect")
        fn = os.path.join(dn, fn)

        # TODO : ouput result mp3
        write_arr_mp3(fn, result, sr)

            "   result_norm < bg_norm * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * ! ! ! de success ! ! !",
            end=" ")
        card = [1, shift, vol,
                sr_mix, sr_bg, sr_result,
                song_duration, bg_duration, result_duration, result_norm_ave, ' ']
        return card
        # 輸出result with norm_value_fileName,回傳card
        fn = os.path.basename(out_file)
        fn = "norm_" + str(int(result_norm_ave)) + "_" + fn
        dn = os.path.dirname(out_file)
        dn = os.path.join(dn, "error")
        fn = os.path.join(dn, fn)

        # Write null file with only filename
        result = np.array([])
        np.savetxt(fn, result)

        # write_arr_mp3(fn, result, sr)
        print("   " + fn)

            "   result_norm > bg_norm - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ! ! ! de fail ! ! !",
            end=" ")
        card = [0, shift, vol,
                sr_mix, sr_bg, sr_result,
                song_duration, bg_duration, result_duration, result_norm_ave, 'de fail']
        return card