def apply(self, wav): """Computes distance between sliding windows embeddings Parameter --------- wav : str Path to wav audio file Returns ------- predictions : SlidingWindowFeature """ from pyannote.algorithms.stats.gaussian import Gaussian current_file = {'uri': wav, 'medium': {'wav': wav}} t, left, right = next(self.from_file(current_file)) y = [] for xL, xR in zip(left, right): gL = Gaussian(covariance_type='diag').fit(xL) gR = Gaussian(covariance_type='diag').fit(xR) y.append(gL.divergence(gR)) y = np.array(y) window = SlidingWindow(duration=2 * self.duration, step=self.step, start=0.) return SlidingWindowFeature(y, window)
def apply(self, current_file): """Computes BIC distance between sliding windows Parameter --------- current_file : dict Returns ------- predictions : SlidingWindowFeature """ from pyannote.algorithms.stats.gaussian import Gaussian t, left, right = next(self.from_file(current_file)) y = [] for xL, xR in zip(left, right): gL = Gaussian(covariance_type=self.covariance_type).fit(xL) gR = Gaussian(covariance_type=self.covariance_type).fit(xR) y.append(gL.bic(gR, penalty_coef=0)[0]) y = np.array(y) window = SlidingWindow(duration=2 * self.duration, step=self.step, start=0.) return SlidingWindowFeature(y, window)
def get_model( self, cluster, annotation=None, feature=None, **kwargs ): timeline = annotation.label_timeline(cluster) data = feature.crop(timeline) gaussian = Gaussian(covariance_type=self.covariance_type) gaussian.fit(data) return gaussian
def __call__(self, starting_point, features=None): current_gaussian = None current_label = None current_segment = None copy = starting_point.copy() for segment, track, label in starting_point.itertracks(label=True): data = features.crop(segment) gaussian = Gaussian(covariance_type=self.covariance_type) gaussian.fit(data) if current_gaussian is None: current_gaussian = gaussian current_segment = segment current_label = starting_point[segment, track] continue gap = (current_segment ^ segment).duration current_segment = segment # stop merging if gap is large if gap > self.max_gap: current_gaussian = gaussian current_label = label continue delta_bic, merged_gaussian = current_gaussian.bic( gaussian, penalty_coef=self.penalty_coef) # stop merging if similariy is small if delta_bic < 0.0: current_gaussian = gaussian current_label = label continue # merge in any other situations TEMPLATE = ( "Merging {cluster1} and {cluster2} with " "(BIC = {bic:g})." ) message = TEMPLATE.format( cluster1=current_label, cluster2=label, bic=delta_bic) self.logger.debug(message) current_gaussian = merged_gaussian copy[segment, track] = current_label return copy
def __call__(self, starting_point, features=None): current_gaussian = None current_label = None current_segment = None copy = starting_point.copy() for segment, track, label in starting_point.itertracks(label=True): data = features.crop(segment) gaussian = Gaussian(covariance_type=self.covariance_type) gaussian.fit(data) if current_gaussian is None: current_gaussian = gaussian current_segment = segment current_label = starting_point[segment, track] continue gap = (current_segment ^ segment).duration current_segment = segment # stop merging if gap is large if gap > self.max_gap: current_gaussian = gaussian current_label = label continue delta_bic, merged_gaussian = current_gaussian.bic( gaussian, penalty_coef=self.penalty_coef) # stop merging if similariy is small if delta_bic < 0.0: current_gaussian = gaussian current_label = label continue # merge in any other situations TEMPLATE = ("Merging {cluster1} and {cluster2} with " "(BIC = {bic:g}).") message = TEMPLATE.format(cluster1=current_label, cluster2=label, bic=delta_bic) self.logger.debug(message) current_gaussian = merged_gaussian copy[segment, track] = current_label return copy
def compute_model(self, cluster, parent=None): timeline = parent.current_state.label_timeline(cluster) data = parent.features.crop(timeline) gaussian = Gaussian(covariance_type=self.covariance_type) gaussian.fit(data) return gaussian
def compare(dataset, medium_template, config_yml, output_dir): import itertools from pyannote.algorithms.stats.gaussian import Gaussian # load configuration file with open(config_yml, 'r') as fp: config = yaml.load(fp) X, y_true = generate_test(dataset, medium_template, config) n_sequences = X.shape[0] gaussians = [] for x in X: g = Gaussian(covariance_type='diag').fit(x) gaussians.append(g) bic = np.zeros((n_sequences, n_sequences), dtype=np.float) for i, j in itertools.combinations(range(n_sequences), 2): bic[i, j], _ = gaussians[i].bic(gaussians[j], penalty_coef=0.) distances = squareform(bic, checks=False) # -- distances distributions plot_distributions(y_true, distances, output_dir + '/plot.bic', xlim=(0, 20), ymax=0.5, nbins=100) # -- precision / recall curve auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot.bic') msg = 'BIC | AUC = {auc:.2f}%' print(msg.format(auc=100 * auc)) # -- det curve eer = plot_det_curve(y_true, -distances, output_dir + '/plot.bic') msg = 'BIC | EER = {eer:.2f}%' print(msg.format(eer=100 * eer)) divergence = np.zeros((n_sequences, n_sequences), dtype=np.float) for i, j in itertools.combinations(range(n_sequences), 2): divergence[i, j] = gaussians[i].divergence(gaussians[j]) distances = squareform(divergence, checks=False) # -- distances distributions plot_distributions(y_true, distances, output_dir + '/plot.divergence', xlim=(0, 20), ymax=0.5, nbins=100) # -- precision / recall curve auc = plot_precision_recall_curve(y_true, -distances, output_dir + '/plot.divergence') msg = 'Divergence | AUC = {auc:.2f}%' print(msg.format(auc=100 * auc)) # -- det curve eer = plot_det_curve(y_true, -distances, output_dir + '/plot.divergence') msg = 'Divergence | EER = {eer:.2f}%' print(msg.format(eer=100 * eer))
# randomly select (at most) 100 sequences from each speaker to ensure # all speakers have the same importance in the evaluation unique, y, counts = np.unique(y, return_inverse=True, return_counts=True) n_speakers = len(unique) indices = [] for speaker in range(n_speakers): i = np.random.choice(np.where(y == speaker)[0], size=min(100, counts[speaker]), replace=False) indices.append(i) indices = np.hstack(indices) X, y = X[indices], y[indices, np.newaxis] # one Gaussian per segment from pyannote.algorithms.stats.gaussian import Gaussian full = [Gaussian(covariance_type='full').fit(x) for x in X] diag = [Gaussian(covariance_type='diag').fit(x) for x in X] # compute BIC and divergence between every pair of sequences import itertools n_sequences = len(X) bic = np.zeros((n_sequences, n_sequences), dtype=np.float32) div = np.zeros((n_sequences, n_sequences), dtype=np.float32) for i, j in itertools.combinations(range(n_sequences), 2): gi, gj = full[i], full[j] bic[i, j] = gi.bic(gj, penalty_coef=0)[0] bic[j, i] = bic[i, j] gi, gj = diag[i], diag[j]