示例#1
0
def _initialSegmentation(mfcc, wavFile, save_all, wdir):
    wavName = wavFile[:-4]
    init_diar = segmentation.init_seg(mfcc, wavName)
    if save_all:
        init_filename = os.path.join(wdir, wavName + '.i.seg')
        Diar.write_seg(init_filename, init_diar)
    return init_diar
示例#2
0
def _linearBic(mfcc, seg_diar, thr_l, wavFile, wdir, save_all):
    wavName = wavFile[:-4]
    bicl_diar = segmentation.bic_linear(mfcc, seg_diar, thr_l, sr=False)
    if save_all:
        bicl_filename = os.path.join(wdir, wavName + '.l.seg')
        Diar.write_seg(bicl_filename, bicl_diar)
    return bicl_diar
示例#3
0
def _gaussDiverSegmentation(mfcc, wavFile, init_diar, win_size, wdir, save_all):
    wavName = wavFile[:-4]
    seg_diar = segmentation.segmentation(mfcc, init_diar, win_size)
    if save_all:
        seg_filename = os.path.join(wdir, wavName + '.s.seg')
        Diar.write_seg(seg_filename, seg_diar)
    return seg_diar
示例#4
0
def _viterbiDecode(mfcc, bich_diar, thr_vit, wavFile, wdir, save_all):
    wavName = wavFile[:-4]
    vit_diar = viterbi.viterbi_decoding(mfcc, bich_diar, thr_vit)
    if save_all:
        vit_filename = os.path.join(wdir, wavName + '.d.seg')
        Diar.write_seg(vit_filename, vit_diar)
    return vit_diar
示例#5
0
def _bicAhc(mfcc, bicl_diar, thr_h, wavFile, wdir, save_all):
    wavName = wavFile[:-4]
    bic = hac_bic.HAC_BIC(mfcc, bicl_diar, thr_h, sr=False)
    bich_diar = bic.perform(to_the_end=True)
    if save_all:
        bichac_filename = os.path.join(wdir, wavName + '.h.seg')
        Diar.write_seg(bichac_filename, bich_diar)
    #link, data = plot_dendrogram(bic.merge, 0)
    return bich_diar
示例#6
0
 def create_seg_viterbi(self, cep, segment_dir):
     #viterbi resegmentation
     for file_name in os.listdir(segment_dir):
         diar = Diar.read_seg(os.path.join(segment_dir, file_name))
         vit_diar = viterbi.viterbi_decoding(cep, diar, self.vit_penalty)
         Diar.write_seg(
             os.path.join(self.results_vit_dir,
                          file_name + '.viterbi.{:.2f}'.format(-250)),
             vit_diar)
def init_seg(cep, show='empty', cluster='init'):
    """
    Return an initial segmentation composed of one segment from the first to the
    last feature in *cep*.

    :param cep: numpy.ndarry containing MFCC
    :param show: the speaker of the cep
    :param cluster: str
    :return: a Diar object
    """
    length = cep.shape[0]
    table_out = Diar()
    table_out.append(show=show, start=0, stop=length, cluster=cluster)
    return table_out
示例#8
0
 def create_seg_bic_hac(self, cep, segment_dir):
     for file_name in os.listdir(segment_dir):
         try:
             diar = Diar.read_seg(os.path.join(segment_dir, file_name))
             for bic_value in np.linspace(self.bic_hac_start,
                                          self.bic_hac_end,
                                          self.bic_hac_num):
                 bic = hac_bic.HAC_BIC(cep, diar, bic_value, sr=False)
                 bic_hac_diar = bic.perform(to_the_end=True)
                 Diar.write_seg(os.path.join(self.bic_hac_dir, file_name+'.bic_value.{:.2f}'.format(bic_value))\
                             , bic_hac_diar)
         except Exception as e:
             traceback.print_exc()
             continue
示例#9
0
def hac_bic(feature_server, diar, threshold, square_root_bic=False):
    shows = diar.make_index(['show'])
    diar_out = Diar()
    for show in shows:
        cep, _ = feature_server.load(show)
        bic = HAC_BIC(cep, shows[show], alpha=threshold, sr=square_root_bic)
        diar_out += bic.perform(to_the_end=True)
    return diar_out
示例#10
0
    def train(self):
        try:
            init_diar = Diar.read_seg(self.input_seg)
            #init_diar = segmentation.self.init_seg(cep, show)
            init_diar.pack(50)
            Diar.write_seg(self.init_seg, init_diar)
            gd_diar = segmentation.segmentation(self.cep, init_diar,
                                                self.win_size)
            Diar.write_seg(self.gd_seg, gd_diar)
        except Exception as e:
            traceback.print_exec()
            print("initialziation fault")

        #performing experiment
        self.create_seg_bic_linear(self.cep, gd_diar)
        self.create_seg_bic_hac(self.cep, self.linear_bic_dir)
        self.create_seg_iv_AHC(self.bic_hac_dir, self.input_show)
        self.create_seg_viterbi(self.cep, self.hac_iv_dir)
示例#11
0
 def create_seg_iv_AHC(self, segment_dir, input_show):
     model_iv = ModelIV(self.model_fn)
     #print(segment_dir)
     for file_name in os.listdir(segment_dir):
         try:
             segment_diar = Diar.read_seg(
                 os.path.join(segment_dir, file_name))
             #print(segment_diar)
             model = self.train_ivectors(model_iv, self.mfcc_dir, file_name,
                                         segment_diar, self.input_show)
             scores = self.score_plda(model)
             for hac_value in np.linspace(self.t_min, self.t_max,
                                          self.t_num):
                 diar_iv, _, _ = hac_iv(segment_diar,
                                        scores,
                                        threshold=hac_value)
                 Diar.write_seg(os.path.join(self.hac_iv_dir, file_name+'.hac_value.{:.2f}'.format(hac_value))\
                             , diar_iv)
         except Exception as e:
             traceback.print_exc()
             print("There is an error over here")
             continue
def _split_e(smooth, diarization, split_size):
    """
    Long segments of *diarization* are  cut recursively at their points of lowest
    energy in order to yield segments shorter than *split_size* seconds.

    :param smooth: sliding means of the energy (numpy.ndarray)
    :param diarization: a Diarization object
    :param split_size: maximum size of a segment
    :return: a Diar object
    """
    diarization_out = Diar()
    for segment in diarization:
        _split_seg(smooth, segment, 250, split_size, diarization_out.segments)
    return diarization_out
def sanity_check(cep, show, cluster='init'):
    """
    Removes equal MFCC of *cep* and return a diarization.

    :param cep: numpy.ndarry containing MFCC
    :param show: speaker of the show
    :return: a dirization object
    """
    table = Diar()

    # 1- diff on cep[i] - cep[i+1]
    # 2- sum of the n MFCC
    # 3- take equal values, give a boolean array
    b = np.sum(np.diff(cep, axis=0), axis=1) == 0
    # make a xor on the boolean array, true index+1 correspond to a boundary
    bits = b[:-1] ^ b[1:]
    # convert true value into a list of feature indexes
    # append 0 at the beginning of the list, append the last index to the list
    idx = [0] + (np.arange(len(bits))[bits] + 1).tolist() + [cep.shape[0]]
    # for each pair of indexes (idx[i] and idx[i+1]), create a segment
    for i in range(0, len(idx) - 1, 2):
        table.append(show=show, start=idx[i], stop=idx[i + 1], cluster=cluster)

    return table
示例#14
0
    def decode(self, table):
        """
        performs a Viterbi decoding of the segment given in diarization
        :param table: a Diar object
        :return: a Diar object
        """

        # print(self.transition_probabilities)
        # print(self.observation)

        path = numpy.ones((self.nb_features, self.nb_clusters), 'int32') * -1
        path[0, :] = numpy.arange(self.nb_clusters)
        out_diarization = Diar()

        for row in table:
            start = row['start']
            stop = min(row['stop'], self.nb_features - 1)
            logging.debug('perform from %d to %d', start, stop)

            for t in range(start, stop + 1):
                tmp = self.observation[t -
                                       1, :] + self.transition_probabilities
                self.observation[t, :] += numpy.max(tmp, axis=1)
                path[t, :] = numpy.argmax(tmp, axis=1)

            max_pos = numpy.argmax(self.observation[stop, :])
            out_diarization.append(show=self.show,
                                   start=stop - 1,
                                   stop=stop,
                                   cluster=self.cluster_list[max_pos])
            for t in range(stop - 1, start, -1):
                max_pos = path[t, max_pos]
                cluster = self.cluster_list[max_pos]
                if (out_diarization[-1]['start']
                        == t) and (out_diarization[-1]['cluster'] == cluster):
                    out_diarization[-1]['start'] -= 1
                else:
                    out_diarization.append(show=self.show,
                                           start=t - 1,
                                           stop=t,
                                           cluster=cluster)
        out_diarization.sort()
        # self.observation = None
        return out_diarization
def segmentation(cep, diarization, win_size=250):
    diarization_out = Diar()
    for segment in diarization:
        l = segment.duration()
        # logging.info('start: ', seg['start'],'end: ', seg['stop'], 'len: ', l)
        if l > 2 * win_size:
            cep_seg = segment.seg_features(cep)
            tmp = div_gauss(cep_seg, show=segment['show'], win=win_size, shift=segment['start'])
            diarization_out.append_diar(tmp)
        else:
            diarization_out.append_seg(segment)

    i=0
    for segment in diarization_out:
        segment['cluster'] = 'S'+str(i)
        i += 1

    return diarization_out
def div_gauss(cep, show='empty', win=250, shift=0):
    """
    Segmentation based on gaussian divergence.

    The segmentation detects the instantaneous change points corresponding to
    segment boundaries. The proposed algorithm is based on the detection of
    local maxima. It detects the change points through a gaussian divergence
    (see equation below), computed using Gaussians with diagonal covariance 
    matrices. The left and right gaussians are estimated over a five-second 
    window sliding along the whole signal (2.5 seconds for each gaussian, 
    given *win* =250 features).
    A change point, i.e. a segment boundary, is present in the middle of the
    window when the gaussian divergence score reaches a local maximum.


        :math:`GD(s_l,s_r)=(\\mu_r-\\mu_l)^t\\Sigma_l^{-1/2}\\Sigma_r^{-1/2}(\\mu_r-\\mu_l)`

    where :math:`s_l` is the left segment modeled by the mean :math:`\mu_l` and
    the diagonal covariance matrix :math:`\\Sigma_l`, :math:`s_r` is the right
    segment modeled by the mean :math:`\mu_r` and the diagonal covariance
    matrix :math:`\\Sigma_r`.

    :param cep: numpy array of frames
    :param show: speaker of the show
    :param win: windows size in number of frames
    :return: a diarization object (s4d annotation)
    """

    length = cep.shape[0]
    # start and stop of the rolling windows A
    start_a = win - 1  # end of NAN
    stop_a = length - win
    # start and stop of the rolling windows B
    start_b = win + win - 1  # end of nan + delay
    stop_b = length

    # put features in a Pandas DataFrame
    df = pd.DataFrame(cep)
    # compute rolling mean and std in the window of size win, get numpy array
    # mean and std have NAN at the beginning and the end of the output array
    #mean = pd.rolling_mean(df, win).values
    #std = pd.rolling_std(df, win).values
    r = df.rolling(window=win, center=False)
    mean = r.mean().values
    std = r.std().values

    # compute GD scores using 2 windows A and B
    dist = (np.square(mean[start_a:stop_a, :] - mean[start_b:stop_b, :]) / (
        std[start_a:stop_a, :] * std[start_b:stop_b, :])).sum(axis=1)

    # replace missing value to match cep size
    dist_pad = np.lib.pad(dist, (win - 1, win), 'constant',
                          constant_values=(dist[0], dist[-1]))

    # remove non-speech frame
    # find local maximal at + or - win size
    borders = scipy.signal.argrelmax(dist_pad, order=win)[0].tolist()
    # append the first and last
    borders = [0] + borders + [length]

    diarization_out = Diar()
    spk = 0
    for i in range(0, len(borders) - 1):
        diarization_out.append(show=show, start=shift+borders[i],
                         stop=shift+borders[i + 1], cluster='S' + str(spk))
        spk += 1
    return diarization_out
示例#17
0
def pyAudioDiar():
    duration, result = aS.speaker_diarization(labelFileNameSound.get(),
                                              int(labelNumberOfSpeakers.get()),
                                              lda_dim=0,
                                              plot_res=False)
    show = 'diarizationExample'
    input_show = labelFileNameSound.get()
    input_sad = None
    win_size = 250
    thr_l = 2
    thr_h = 3
    thr_vit = -250
    wdir = os.path.join('out', show)
    if not os.path.exists(wdir):
        os.makedirs(wdir)
    fs = get_feature_server(input_show, feature_server_type='basic')
    cep, _ = fs.load(show)
    cep.shape

    if input_sad is not None:
        init_diar = Diar.read_seg(input_sad)
        init_diar.pack(50)
    else:
        init_diar = segmentation.init_seg(cep, show)

    seg_diar = segmentation.segmentation(cep, init_diar, win_size)

    bicl_diar = segmentation.bic_linear(cep, seg_diar, thr_l, sr=False)

    bic = hac_bic.HAC_BIC(cep, bicl_diar, thr_h, sr=False)
    bich_diar = bic.perform(to_the_end=True)

    vit_diar = viterbi.viterbi_decoding(cep, bich_diar, thr_vit)
    resList = []
    currentPosition = 0
    for row in vit_diar:
        speakerValue = int(row[1][1:])
        while currentPosition < (row[3] + row[4]):
            resList.append(speakerValue)
            currentPosition += 20

    currentPosition = 0
    realityList = []
    realityFile = pd.read_csv(labelFileNameSegment.get(),
                              delimiter='\t',
                              encoding='utf-8',
                              names=['start', 'end', 'speaker'])
    for index, row in realityFile.iterrows():
        speakerValue = int(row['speaker'][1:])
        while currentPosition < row['end']:
            realityList.append(int(speakerValue))
            currentPosition += 0.2

    plot.subplot(3, 1, 2)
    plot.title("s4d:")
    plot.plot(np.arange(0, duration, duration / len(resList)), resList, 'ro')
    plot.subplot(3, 1, 1)
    plot.title("Реальность:")
    plot.plot(np.arange(0, duration, duration / len(realityList)), realityList,
              'bo')
    plot.subplot(3, 1, 3)
    plot.title("pyPlotAudio:")
    plot.plot(np.arange(0, duration, duration / len(result)), result, 'go')
    plot.show()
示例#18
0
plda_seg_fn = './data/seg/train.plda.seg'
rank_plda = 150
it_max_plda = 10
mfcc_plda_fn = './data/mfcc/norm_plda.h5'
plda_idmap_fn = './data/mfcc/plda_idmap.h5'
plda_fn = './data/model/plda_'+str(rank_tv)+'_'+str(rank_plda)+'.h5'
norm_stat_fn = './data/model/norm.stat.h5'
norm_fn = './data/model/norm.h5'
norm_iv_fn = './data/model/norm.iv.h5'


matrices_fn = './data/model/matrices.h5'
model_fn = './data/model/ester_model_{}_{}_{}.h5'.format(nb_gauss, rank_tv, rank_plda)

logging.info('Computing MFCC for UBM')
diar_ubm = Diar.read_seg(ubm_seg_fn, normalize_cluster=True)
fe = get_feature_extractor(audio_dir, 'sid')
ubm_idmap = fe.save_multispeakers(diar_ubm.id_map(), output_feature_filename=mfcc_ubm_fn, keep_all=False)
ubm_idmap.write_txt(ubm_idmap_fn)

fs = get_feature_server(mfcc_ubm_fn, 'sid')

spk_lst = ubm_idmap.rightids
ubm = Mixture()
ubm.EM_split(fs, spk_lst, nb_gauss,
             iterations=(1, 2, 2, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8), num_thread=num_thread,
             llk_gain=0.01)
ubm.write(ubm_fn, prefix='ubm/')

logging.info('Computing MFCC for TV')
diar_tv = Diar.read_seg(tv_seg_fn, normalize_cluster=True)
示例#19
0
 def create_seg_bic_linear(self, cep, diar):
     for t1 in np.linspace(self.li_bic_p_start, self.li_bic_p_stop,
                           self.li_bic_p_num):
         bicl_diar = segmentation.bic_linear(cep, diar, t1, sr=False)
         Diar.write_seg(self.linear_bic_seg.format(t1), bicl_diar)