示例#1
0
        def get_duration_from_file(sound_metadata, entry):
            if 'duration' in entry and entry['duration'] != 0 and entry[
                    'duration'] != 0.0:
                return entry['duration']

            filename = entry['filename']

            if 'NoFilename' in entry['flags']:
                filename = "%04x.wav" % entry['sound_id']

            return audio.get_duration(
                os.path.join(params['sound_folder'], filename))
def split_on_silence_with_librosa(audio_path,
                                  top_db=40,
                                  frame_length=1024,
                                  hop_length=256,
                                  skip_idx=0,
                                  out_ext="wav",
                                  min_segment_length=3,
                                  max_segment_length=8,
                                  pre_silence_length=0,
                                  post_silence_length=0):

    filename = os.path.basename(audio_path).split('.', 1)[0]
    in_ext = audio_path.rsplit(".")[1]

    audio = load_audio(audio_path)

    edges = librosa.effects.split(audio,
                                  top_db=top_db,
                                  frame_length=frame_length,
                                  hop_length=hop_length)

    new_audio = np.zeros_like(audio)
    for idx, (start, end) in enumerate(edges[skip_idx:]):
        new_audio[start:end] = remove_breath(audio[start:end])

    save_audio(new_audio, add_postfix(audio_path, "no_breath"))
    audio = new_audio
    edges = librosa.effects.split(audio,
                                  top_db=top_db,
                                  frame_length=frame_length,
                                  hop_length=hop_length)

    audio_paths = []
    for idx, (start, end) in enumerate(edges[skip_idx:]):
        segment = audio[start:end]
        duration = get_duration(segment)

        if duration <= min_segment_length or duration >= max_segment_length:
            continue

        output_path = "{}/{}.{:04d}.{}".format(os.path.dirname(audio_path),
                                               filename, idx, out_ext)

        padded_segment = np.concatenate([
            get_silence(pre_silence_length),
            segment,
            get_silence(post_silence_length),
        ])

        save_audio(padded_segment, output_path)
        audio_paths.append(output_path)

    return audio_paths
示例#3
0
 def SetAudioEventWithMDX(self):
     if self.AudioControl.path != self.cue.path:
         return
     self.pending.mdx = False
     self.cue.channel = 2
     self.cue.tempo = 0.0
     self.cue.autogain = 0.4
     self.cue.waveform = None
     self.cue.key = '-'
     self.cue.highlight = None
     self.cue.highlight_offset = None
     self.cue.highlight_variable = None
     self.cue.fffr_static = 15.0
     self.cue.fffr_variable = 15.0
     self.cue.mdx = self.GetMDX(self.cue.path)
     duration = audio.get_duration(self.cue.hStream)
     self.cue.duration = duration
     self.SetTrackOffsetTime(0.0)
     self.SetTrackFinishTime(duration)
     self.cue.item = MakeMusicFileItem(self.cue.path, 0, self.item_column)
     self.DirectDraw()
示例#4
0
def text_recognition(path, config):
    root, ext = os.path.splitext(path)
    txt_path = root + ".txt"

    if os.path.exists(txt_path):
        with open(txt_path) as f:
            out = json.loads(open(txt_path).read())
            return out

    from google.cloud import speech
    from google.cloud.speech import enums
    from google.cloud.speech import types

    out = {}
    error_count = 0

    tmp_path = os.path.splitext(path)[0] + ".tmp.wav"

    while True:
        try:
            client = speech.SpeechClient()

            content = load_audio(
                path,
                pre_silence_length=config.pre_silence_length,
                post_silence_length=config.post_silence_length)

            max_duration = config.max_duration - \
                    config.pre_silence_length - config.post_silence_length
            audio_duration = get_duration(content)

            if audio_duration >= max_duration:
                print(" [!] Skip {} because of duration: {} > {}". \
                        format(path, audio_duration, max_duration))
                return {}

            content = resample_audio(content, config.sample_rate)
            save_audio(content, tmp_path, config.sample_rate)

            with io.open(tmp_path, 'rb') as f:
                audio = types.RecognitionAudio(content=f.read())

            config = types.RecognitionConfig(
                encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16,
                sample_rate_hertz=config.sample_rate,
                language_code='ko-KR')

            response = client.recognize(config, audio)
            if len(response.results) > 0:
                alternatives = response.results[0].alternatives

                results = [
                    alternative.transcript for alternative in alternatives
                ]
                assert len(results) == 1, "More than 1 results: {}".format(
                    results)

                out = {path: "" if len(results) == 0 else results[0]}
                print(path, results[0])
                break
            break
        except Exception as err:
            raise Exception("OS error: {0}".format(err))

            error_count += 1
            print("Skip warning for {} for {} times". \
                    format(path, error_count))

            if error_count > 5:
                break
            else:
                continue

    remove_file(tmp_path)
    with open(txt_path, 'w') as f:
        json.dump(out, f, indent=2, ensure_ascii=False)

    return out
示例#5
0
def test_getduration_normal():
    assert audio.get_duration('pipeau-Defakator.mp3') == 18
示例#6
0
def test_getduration_not_a_mp3_file():
    os.system('echo "000" > musiquetest.txt')
    assert audio.get_duration('musiquetest.txt') == 0
示例#7
0
def test_getduration_no_file():
    assert audio.get_duration('NoFileIsNamedLikeThis') == 0
示例#8
0
 def SetDurationTime(self, duration=None):
     if duration is None:
         duration = audio.get_duration(self.cue.hStream)
     self.cue.duration = duration
示例#9
0
 def GetDurationTime(self):
     duration = audio.get_duration(self.cue.hStream)
     if duration == -1.0:
         return self.cue.duration
     return duration
示例#10
0
    def InitAudio(self):
        ######
        # import ctypes
        # from packages.pybassex import pybassex
        # ex = pybassex()
        # path = 'C:\\Users\\tkmix\\Desktop\\WORK\\macrobox-player\\source\\packages\\bass_vst.dll'
        # bass_module = ctypes.WinDLL(path)
        # func_type = ctypes.WINFUNCTYPE
        # QWORD = ctypes.c_int64
        # HSTREAM = ctypes.c_ulong
        # BASS_VST_ChannelSetDSP = func_type(
        #     ctypes.c_ulong, ctypes.c_ulong, ctypes.c_void_p, ctypes.c_int64, ctypes.c_ulong)(('BASS_VST_ChannelSetDSP', bass_module))
        # BASS_VST_GetParam = func_type(
        #     ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_GetParam', bass_module))
        # # BASS_VST_SetParam = func_type(
        # #     ctypes.c_bool, HSTREAM, ctypes.c_int64, ctypes.c_float)(('BASS_VST_SetParam', bass_module))
        # BASS_VST_SetParam = func_type(
        #     ctypes.c_bool, HSTREAM, ctypes.c_int64, ctypes.c_float)(('BASS_VST_SetParam', bass_module))

        # BASS_VST_EmbedEditor = func_type(
        #     ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_EmbedEditor', bass_module))
        # BASS_VST_SetScope = func_type(
        #     ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_SetScope', bass_module))
        # BASS_VST_GetInfo = func_type(
        #     HSTREAM, ctypes.c_ulong)(('BASS_VST_GetInfo', bass_module))
        ######

        self.parent.parent.ListBox.List.pending.SkipStopIcon = True
        if self.path == self.parent.cue.path:
            is_position_set = True
        else:
            is_position_set = False
        self.path = self.parent.cue.path
        if pybass.BASS_ChannelIsActive(self.hStream) == 1:
            pybass.BASS_StreamFree(self.hStream)
        if sys.platform.startswith('win'):
            flags = pybass.BASS_STREAM_PRESCAN | pybass.BASS_UNICODE
        elif sys.platform.startswith('darwin'):
            flags = pybass.BASS_STREAM_PRESCAN
            self.path = self.path.encode(sys.getfilesystemencoding())

        self.hStream = pybass.BASS_StreamCreateFile(False, self.path, 0, 0,
                                                    flags)

        ######
        # print(dir(pybass))
        # from pybass import pybass_vst
        vst_plugin_name = 'LoudMax64.dll'
        vst_plugin_name = 'LoudMaxLite64.dll'
        # vst_plugin_path = os.path.join(os.path.dirname(__file__), 'packages', vst_plugin_name)
        vst_plugin_path = os.path.join('assets', 'dlls', vst_plugin_name)
        if hasattr(sys, '_MEIPASS'):
            vst_plugin_path = os.path.join(sys._MEIPASS, vst_plugin_path)
        else:
            vst_plugin_path = os.path.join(
                os.path.dirname(os.path.dirname(__file__)), vst_plugin_path)
        # BASS_VST_KEEP_CHANS = 0x00000001
        flags = pybass.BASS_UNICODE | pybass.BASS_VST_KEEP_CHANS
        self.vstHandle = pybass.BASS_VST_ChannelSetDSP(self.hStream,
                                                       vst_plugin_path, flags,
                                                       0)
        pybass.BASS_VST_SetParam(self.vstHandle, 0, 0.0)
        pybass.BASS_VST_SetParam(self.vstHandle, 1, 1.0)
        pybass.BASS_VST_SetParam(self.vstHandle, 2, 0.0)
        pybass.BASS_VST_SetParam(self.vstHandle, 3, 0.0)
        # print(os.path.join(os.path.dirname(__file__), 'packages', 'LoudMax64.dll'))
        # self.parent.Show()
        # x = BASS_VST_SetScope(self.vstHandle, 123)
        # dialog = wx.TextEntryDialog(self.parent.parent.parent, 'Enter Your Name', 'Text Entry Dialog')
        # BASS_VST_EmbedEditor(self.vstHandle, dialog.GetHandle())
        # dialog.ShowModal()
        # if dialog.ShowModal() == wx.ID_OK:
        #     self.text.SetValue('Name entered:' + dialog.GetValue())
        # dialog.Destroy()

        # BASS_VST_EmbedEditor(self.vstHandle, self.parent.GetHandle())
        # print()

        # param = BASS_VST_GetParam(self.vstHandle, 0)
        # info = None
        # BASS_VST_SetParam(self.vstHandle, 1, 1.0)

        # print(param)
        # param = BASS_VST_GetParam(self.vstHandle, 1)
        # print(param)
        ######

        self.parent.cue.hStream = self.hStream
        audio.set_volume(self.hStream, 0.0)
        if self.resume is not None:
            resume = self.resume
            if self.resume < 0:
                duration = audio.get_duration(self.hStream)
                resume = duration + self.resume
            audio.set_position(self.hStream, resume)
        pybass.BASS_ChannelPlay(self.hStream, False)

        self.fadein.cnt = self.fadein.time
        if is_position_set is False and self.parent.IsLoopOn():
            self.fadein.cnt = self.fadein.time
        else:
            self.parent.SetVolume()
        self.resume = None
        self.pending = False
        # self.parent.FocusPlayingItem()
        self.parent.parent.ListTab.reInitBuffer = True
        self.parent.parent.ListBox.List.reInitBuffer = True