def myprocess(in_fname, out_fname): if os.path.isfile(out_fname): return 'already done' dname = os.path.dirname(out_fname) if not os.path.isdir(dname): os.makedirs(dname) results = g(in_fname) seg2csv(results, out_fname) return 0
def _do_segmentation_(input_files, chunk_id, seg, odir): start_time = time.time() odircsv = os.path.join(odir, "seg_csv") odirw = os.path.join(odir, "seg_wav") segment_dict = {} for i, e in enumerate(input_files): print('processing file %d-%d/%d: %s' % (chunk_id, i + 1, len(input_files), e)) e = e.rstrip().lstrip() base, _ = os.path.splitext(os.path.basename(e)) seg2csv(seg(e), '%s/%s.csv' % (odircsv, base)) segs = _do_wav_split_(e, os.path.join(odircsv, base + ".csv"), odirw) segment_dict[base] = segs elapsed_time = time.time() - start_time print(format(elapsed_time) + ' seconds elapsed for ' + str(chunk_id)) return segment_dict
from inaSpeechSegmenter import Segmenter, seg2csv import time media = './VAD/s3_1.mp3' seg = Segmenter() start = int(round(time.time() * 1000)) segmentation = seg(media) delta = int(round(time.time() * 1000)) - start print("Time %d" % delta) seg2csv(segmentation, 'myseg.csv')
help= "(default: 'true'). If set to 'true', segments detected as speech will be splitted into 'male' and 'female' segments. If set to 'false', segments corresponding to speech will be labelled as 'speech' (faster)" ) args = parser.parse_args() # Preprocess arguments and check their consistency input_files = [] for e in args.input: input_files += glob.glob(e) assert len( input_files ) > 0, 'No existing media selected for analysis! Bad values provided to -i (%s)' % args.input odir = args.output_directory assert os.access(odir, os.W_OK), 'Directory %s is not writable!' % odir # Do processings from inaSpeechSegmenter import Segmenter, seg2csv # load neural network into memory, may last few seconds detect_gender = bool(distutils.util.strtobool(args.detect_gender)) seg = Segmenter(vad_engine=args.vad_engine, detect_gender=detect_gender) with warnings.catch_warnings(): warnings.simplefilter("ignore") for i, e in enumerate(input_files): print('processing file %d/%d: %s' % (i + 1, len(input_files), e)) base, _ = os.path.splitext(os.path.basename(e)) seg2csv(seg(e), '%s/%s.csv' % (odir, base))
from inaSpeechSegmenter import Segmenter, seg2csv media = 'audio/original/cn.wav' seg = Segmenter() segmentation = seg(media) seg2csv(segmentation, 'csv/cn_segment.csv') media = 'audio/original/en.wav' seg = Segmenter() segmentation = seg(media) seg2csv(segmentation, 'csv/en_segment.csv')
def call_activity(filename): al=int(call_len(filename)) print('') print('Processing: ', 'm'+filename+'1.wav') print('') if os.path.isfile(data_directory+filename+'1.csv'): df_co=pd.read_csv(data_directory+filename+'1.csv',sep='\t', header=None) else: segmentation = seg(data_directory+'m'+filename+'1.wav') seg2csv(segmentation, data_directory+filename+'1.csv') df_co=pd.DataFrame(segmentation) xgb=df_co.groupby(0).count() if xgb.loc[xgb.index=='Male'].shape[0]==0: gender_co=1 elif xgb.loc[xgb.index=='Female'].shape[0]==0: gender_co=0 else: if np.array(xgb.loc[xgb.index=='Male'][1])[0]>np.array(xgb.loc[xgb.index=='Female'][1])[0]: gender_co=0 else: gender_co=1 print('') print('Processing: ', 'm'+filename+'0.wav') print('') if os.path.isfile(data_directory+filename+'0.csv'): df_cu=pd.read_csv(data_directory+filename+'0.csv',sep='\t', header=None) else: segmentation = seg(data_directory+'m'+filename+'0.wav') seg2csv(segmentation, data_directory+filename+'0.csv') df_cu=pd.DataFrame(segmentation) xgb=df_cu.groupby(0).count() if xgb.loc[xgb.index=='Male'].shape[0]==0: gender_cu=1 elif xgb.loc[xgb.index=='Female'].shape[0]==0: gender_cu=0 else: if np.array(xgb.loc[xgb.index=='Male'][1])[0]>np.array(xgb.loc[xgb.index=='Female'][1])[0]: gender_cu=0 else: gender_cu=1 df_co.replace('NOACTIVITY',0, inplace=True) df_co.replace('Male',2, inplace=True) df_co.replace('Female',2, inplace=True) df_co.replace('Music',1, inplace=True) df_cu.replace('NOACTIVITY',0, inplace=True) df_cu.replace('Male',2, inplace=True) df_cu.replace('Female',2, inplace=True) df_cu.replace('Music',1, inplace=True) act_co=np.zeros(al) act_cu=np.zeros(al) ar_co=df_co.values ar_cu=df_cu.values for i in range(0,ar_co.shape[0]): for s in range(int(ar_co[i,1]),int(ar_co[i,2])): act_co[s]=int(ar_co[i,0]) for i in range(0,ar_cu.shape[0]): for s in range(int(ar_cu[i,1]),int(ar_cu[i,2])): act_cu[s]=int(ar_cu[i,0]) return df_co, df_cu, act_co, act_cu, gender_co, gender_cu
print(Audio.frame_rate, Audio.channels, Audio.DEFAULT_CODECS, Audio.frame_width, Audio.channels, Audio.dBFS, Audio.max_dBFS) ''' # Returns a list of tuples with segment information eg, ('Male',0.92,3) print('Segmenting ...') SegmentInfoTupleList= segmenter(Media) print('segments done') seg2csv(SegmentInfoTupleList, OutDirectory+ FileName[:len(FileName)-4]+ 'AudioSegmentation.csv') # File to be segmented into pydub.audio_segment.AudioSegment # Iterate over tuples in the list for tupl in SegmentInfoTupleList[:]: if tupl[0] not in ('Male','Female', 'NOACTIVITY'): print('deleting') SegmentInfoTupleList.remove(tupl) seg2csv(SegmentInfoTupleList, OutDirectory+ FileName[:len(FileName)-4]+ 'AudioSegmentation_Cleaned.csv') ''' #Incase you want to know about your audio file
input_files += [e] else: input_files += glob.glob(e) assert len(input_files) > 0, "No existing media selected for analysis! Bad values provided to -i ({})".format(args.input) odir = args.output_directory.strip(" \t\n\r").rstrip('/') assert os.access(odir, os.W_OK), 'Directory %s is not writable!' % odir # Do processings from inaSpeechSegmenter import Segmenter, seg2csv, to_parse # load neural network into memory, may last few seconds seg = Segmenter() # case of a file of files files = to_parse(input_files) with warnings.catch_warnings(): warnings.simplefilter("ignore") #print('processing file %d/%d: %s' % (i+1, len(input_files), e)) base = [os.path.splitext(os.path.basename(e)) for e in files] base = [base[i][0] for i in range(len(base))] if len(odir) > 0: fout = ['%s/%s.csv' % (odir, elem) for elem in base] seg2csv(seg(files, verbose=args.no_verbose), fout)
# 区間検出実行(たったこれだけでOK) segmentation = seg(input_file) # ('区間ラベル', 区間開始時刻(秒), 区間終了時刻(秒))というタプルが # リスト化されているのが変数 segmentation # print(segmentation) # inaSpeechSegmenter単体では分割されたwavを作成してくれないので、 # pydubのAudioSegmentにお世話になる (ありがたいライブラリ) speech_segment_index = 0 for segment in segmentation: # segmentはタプル # タプルの第1要素が区間のラベル segment_label = segment[0] if (segment_label == 'speech'): # 音声区間 # 区間の開始時刻の単位を秒からミリ秒に変換 start_time = segment[1] * 1000 end_time = segment[2] * 1000 # 分割結果をwavに出力 newAudio = AudioSegment.from_wav(input_file) newAudio = newAudio[start_time:end_time] output_file = output_file + str(speech_segment_index) + '.wav' newAudio.export(output_file, format="wav") speech_segment_index += 1 del newAudio seg2csv(segmentation, f"./segment/myseg_{}.csv")