def get_duration_from_file(sound_metadata, entry): if 'duration' in entry and entry['duration'] != 0 and entry[ 'duration'] != 0.0: return entry['duration'] filename = entry['filename'] if 'NoFilename' in entry['flags']: filename = "%04x.wav" % entry['sound_id'] return audio.get_duration( os.path.join(params['sound_folder'], filename))
def split_on_silence_with_librosa(audio_path, top_db=40, frame_length=1024, hop_length=256, skip_idx=0, out_ext="wav", min_segment_length=3, max_segment_length=8, pre_silence_length=0, post_silence_length=0): filename = os.path.basename(audio_path).split('.', 1)[0] in_ext = audio_path.rsplit(".")[1] audio = load_audio(audio_path) edges = librosa.effects.split(audio, top_db=top_db, frame_length=frame_length, hop_length=hop_length) new_audio = np.zeros_like(audio) for idx, (start, end) in enumerate(edges[skip_idx:]): new_audio[start:end] = remove_breath(audio[start:end]) save_audio(new_audio, add_postfix(audio_path, "no_breath")) audio = new_audio edges = librosa.effects.split(audio, top_db=top_db, frame_length=frame_length, hop_length=hop_length) audio_paths = [] for idx, (start, end) in enumerate(edges[skip_idx:]): segment = audio[start:end] duration = get_duration(segment) if duration <= min_segment_length or duration >= max_segment_length: continue output_path = "{}/{}.{:04d}.{}".format(os.path.dirname(audio_path), filename, idx, out_ext) padded_segment = np.concatenate([ get_silence(pre_silence_length), segment, get_silence(post_silence_length), ]) save_audio(padded_segment, output_path) audio_paths.append(output_path) return audio_paths
def SetAudioEventWithMDX(self): if self.AudioControl.path != self.cue.path: return self.pending.mdx = False self.cue.channel = 2 self.cue.tempo = 0.0 self.cue.autogain = 0.4 self.cue.waveform = None self.cue.key = '-' self.cue.highlight = None self.cue.highlight_offset = None self.cue.highlight_variable = None self.cue.fffr_static = 15.0 self.cue.fffr_variable = 15.0 self.cue.mdx = self.GetMDX(self.cue.path) duration = audio.get_duration(self.cue.hStream) self.cue.duration = duration self.SetTrackOffsetTime(0.0) self.SetTrackFinishTime(duration) self.cue.item = MakeMusicFileItem(self.cue.path, 0, self.item_column) self.DirectDraw()
def text_recognition(path, config): root, ext = os.path.splitext(path) txt_path = root + ".txt" if os.path.exists(txt_path): with open(txt_path) as f: out = json.loads(open(txt_path).read()) return out from google.cloud import speech from google.cloud.speech import enums from google.cloud.speech import types out = {} error_count = 0 tmp_path = os.path.splitext(path)[0] + ".tmp.wav" while True: try: client = speech.SpeechClient() content = load_audio( path, pre_silence_length=config.pre_silence_length, post_silence_length=config.post_silence_length) max_duration = config.max_duration - \ config.pre_silence_length - config.post_silence_length audio_duration = get_duration(content) if audio_duration >= max_duration: print(" [!] Skip {} because of duration: {} > {}". \ format(path, audio_duration, max_duration)) return {} content = resample_audio(content, config.sample_rate) save_audio(content, tmp_path, config.sample_rate) with io.open(tmp_path, 'rb') as f: audio = types.RecognitionAudio(content=f.read()) config = types.RecognitionConfig( encoding=enums.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=config.sample_rate, language_code='ko-KR') response = client.recognize(config, audio) if len(response.results) > 0: alternatives = response.results[0].alternatives results = [ alternative.transcript for alternative in alternatives ] assert len(results) == 1, "More than 1 results: {}".format( results) out = {path: "" if len(results) == 0 else results[0]} print(path, results[0]) break break except Exception as err: raise Exception("OS error: {0}".format(err)) error_count += 1 print("Skip warning for {} for {} times". \ format(path, error_count)) if error_count > 5: break else: continue remove_file(tmp_path) with open(txt_path, 'w') as f: json.dump(out, f, indent=2, ensure_ascii=False) return out
def test_getduration_normal(): assert audio.get_duration('pipeau-Defakator.mp3') == 18
def test_getduration_not_a_mp3_file(): os.system('echo "000" > musiquetest.txt') assert audio.get_duration('musiquetest.txt') == 0
def test_getduration_no_file(): assert audio.get_duration('NoFileIsNamedLikeThis') == 0
def SetDurationTime(self, duration=None): if duration is None: duration = audio.get_duration(self.cue.hStream) self.cue.duration = duration
def GetDurationTime(self): duration = audio.get_duration(self.cue.hStream) if duration == -1.0: return self.cue.duration return duration
def InitAudio(self): ###### # import ctypes # from packages.pybassex import pybassex # ex = pybassex() # path = 'C:\\Users\\tkmix\\Desktop\\WORK\\macrobox-player\\source\\packages\\bass_vst.dll' # bass_module = ctypes.WinDLL(path) # func_type = ctypes.WINFUNCTYPE # QWORD = ctypes.c_int64 # HSTREAM = ctypes.c_ulong # BASS_VST_ChannelSetDSP = func_type( # ctypes.c_ulong, ctypes.c_ulong, ctypes.c_void_p, ctypes.c_int64, ctypes.c_ulong)(('BASS_VST_ChannelSetDSP', bass_module)) # BASS_VST_GetParam = func_type( # ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_GetParam', bass_module)) # # BASS_VST_SetParam = func_type( # # ctypes.c_bool, HSTREAM, ctypes.c_int64, ctypes.c_float)(('BASS_VST_SetParam', bass_module)) # BASS_VST_SetParam = func_type( # ctypes.c_bool, HSTREAM, ctypes.c_int64, ctypes.c_float)(('BASS_VST_SetParam', bass_module)) # BASS_VST_EmbedEditor = func_type( # ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_EmbedEditor', bass_module)) # BASS_VST_SetScope = func_type( # ctypes.c_bool, HSTREAM, ctypes.c_int64)(('BASS_VST_SetScope', bass_module)) # BASS_VST_GetInfo = func_type( # HSTREAM, ctypes.c_ulong)(('BASS_VST_GetInfo', bass_module)) ###### self.parent.parent.ListBox.List.pending.SkipStopIcon = True if self.path == self.parent.cue.path: is_position_set = True else: is_position_set = False self.path = self.parent.cue.path if pybass.BASS_ChannelIsActive(self.hStream) == 1: pybass.BASS_StreamFree(self.hStream) if sys.platform.startswith('win'): flags = pybass.BASS_STREAM_PRESCAN | pybass.BASS_UNICODE elif sys.platform.startswith('darwin'): flags = pybass.BASS_STREAM_PRESCAN self.path = self.path.encode(sys.getfilesystemencoding()) self.hStream = pybass.BASS_StreamCreateFile(False, self.path, 0, 0, flags) ###### # print(dir(pybass)) # from pybass import pybass_vst vst_plugin_name = 'LoudMax64.dll' vst_plugin_name = 'LoudMaxLite64.dll' # vst_plugin_path = os.path.join(os.path.dirname(__file__), 'packages', vst_plugin_name) vst_plugin_path = os.path.join('assets', 'dlls', vst_plugin_name) if hasattr(sys, '_MEIPASS'): vst_plugin_path = os.path.join(sys._MEIPASS, vst_plugin_path) else: vst_plugin_path = os.path.join( os.path.dirname(os.path.dirname(__file__)), vst_plugin_path) # BASS_VST_KEEP_CHANS = 0x00000001 flags = pybass.BASS_UNICODE | pybass.BASS_VST_KEEP_CHANS self.vstHandle = pybass.BASS_VST_ChannelSetDSP(self.hStream, vst_plugin_path, flags, 0) pybass.BASS_VST_SetParam(self.vstHandle, 0, 0.0) pybass.BASS_VST_SetParam(self.vstHandle, 1, 1.0) pybass.BASS_VST_SetParam(self.vstHandle, 2, 0.0) pybass.BASS_VST_SetParam(self.vstHandle, 3, 0.0) # print(os.path.join(os.path.dirname(__file__), 'packages', 'LoudMax64.dll')) # self.parent.Show() # x = BASS_VST_SetScope(self.vstHandle, 123) # dialog = wx.TextEntryDialog(self.parent.parent.parent, 'Enter Your Name', 'Text Entry Dialog') # BASS_VST_EmbedEditor(self.vstHandle, dialog.GetHandle()) # dialog.ShowModal() # if dialog.ShowModal() == wx.ID_OK: # self.text.SetValue('Name entered:' + dialog.GetValue()) # dialog.Destroy() # BASS_VST_EmbedEditor(self.vstHandle, self.parent.GetHandle()) # print() # param = BASS_VST_GetParam(self.vstHandle, 0) # info = None # BASS_VST_SetParam(self.vstHandle, 1, 1.0) # print(param) # param = BASS_VST_GetParam(self.vstHandle, 1) # print(param) ###### self.parent.cue.hStream = self.hStream audio.set_volume(self.hStream, 0.0) if self.resume is not None: resume = self.resume if self.resume < 0: duration = audio.get_duration(self.hStream) resume = duration + self.resume audio.set_position(self.hStream, resume) pybass.BASS_ChannelPlay(self.hStream, False) self.fadein.cnt = self.fadein.time if is_position_set is False and self.parent.IsLoopOn(): self.fadein.cnt = self.fadein.time else: self.parent.SetVolume() self.resume = None self.pending = False # self.parent.FocusPlayingItem() self.parent.parent.ListTab.reInitBuffer = True self.parent.parent.ListBox.List.reInitBuffer = True