def speaker_diarization(): file = '/home/daiab/machine_disk/data/voice_identity/dianxin/1.wav' use_LDA = False plot = True num_speaker = 2 if use_LDA: pos, cls = aS.speaker_diarization(file, num_speaker, mt_size=4.0, mt_step=0.1, st_win=0.05, st_step=0.01, plot=plot) else: pos, cls = aS.speaker_diarization(file, num_speaker, lda_dim=0, plot=plot) fr, x = audio_basic_io.read_audio_file(file) sep_voice = [[], []] pre_pos = 0 cut_num = int(x.shape[0] * 0.0001) print('cut_num', cut_num) for i, c in enumerate(cls): c = int(c) v_from = pre_pos v_to = int(pos[i] * fr) sep_voice[c] += x[v_from + cut_num: v_to - cut_num].tolist() pre_pos = v_to print(len(sep_voice[0]), len(sep_voice[1])) wavfile.write('./0.wav', fr, np.array(sep_voice[0], dtype=np.int16)) wavfile.write('./1.wav', fr, np.array(sep_voice[1], dtype=np.int16))
def speakerDiarizationWrapper(inputFile, numSpeakers, useLDA): if useLDA: aS.speaker_diarization(inputFile, numSpeakers, plot_res=True) else: aS.speaker_diarization(inputFile, numSpeakers, lda_dim=0, plot_res=True)
def dia(filename, speakers, lda_dim=0): global labels file = open('Speaker_Diarization.txt', 'w').close() main_file(filename, overall=True) timestamp, classes = speaker_diarization(filename=filename, n_speakers=speakers, lda_dim=lda_dim) file = open('Speaker_Diarization.txt', 'a') file.write('Timestamp,Classes\n') for i in range(len(timestamp)): file.write(f'{timestamp[i]},{int(classes[i])}\n') file.close() df = pd.read_csv('Speaker_Diarization.txt') labels = df['Classes'].unique() num_labels = len(labels) previous_label = df['Classes'][0] sda = dict() a = [] segments = [] for i in range(df.shape[0]): if i == 0: a.append(df['Classes'][i]) a.append(df['Timestamp'][i]) elif df['Classes'][i - 1] == df['Classes'][i]: a.append(df['Timestamp'][i]) else: segments.append([a[0], a[1], a[-1]]) a = [] a.append(df['Classes'][i]) a.append(df['Timestamp'][i]) p = [] for label in labels: for segment in segments: if segment[0] == label: p.append([segment[1], segment[2]]) sda[label] = p p = [] speech = AudioSegment.from_wav(filename) for key, value in sda.items(): speaker = 0 for parts in value: speaker += speech[parts[0] * 1000:parts[1] * 1000] file = filename.split('.')[0] + str('_speaker_') + str(key) + '.wav' print(speaker) print(key, value) if len(value) == 0: labels = list(labels) labels.remove(key) else: speaker.export(file, format='wav') print('Files Exported') main_file(file=file, folder='uploads', labels=labels)