def __init__(self): self.engine = Pretrained( validate_dir='/home/src/model_files/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/', epoch=31, device="cpu") self.filename2embedding = {}
def validate_epoch( self, epoch, validation_data, device=None, batch_size=32, diarization=False, n_jobs=1, duration=None, step=0.25, **kwargs ): # compute (and store) SCD scores pretrained = Pretrained( validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device, ) for current_file in validation_data: current_file["scores"] = pretrained(current_file) # pipeline pipeline = self.Pipeline(scores="@scores", fscore=True, diarization=diarization) def fun(threshold): pipeline.instantiate({"alpha": threshold, "min_duration": 0.100}) metric = pipeline.get_metric(parallel=True) validate = partial(validate_helper_func, pipeline=pipeline, metric=metric) if n_jobs > 1: _ = self.pool_.map(validate, validation_data) else: for file in validation_data: _ = validate(file) return 1.0 - abs(metric) res = scipy.optimize.minimize_scalar( fun, bounds=(0.0, 1.0), method="bounded", options={"maxiter": 10} ) threshold = res.x.item() return { "metric": self.validation_criterion(None, diarization=diarization), "minimize": False, "value": float(1.0 - res.fun), "pipeline": pipeline.instantiate( {"alpha": threshold, "min_duration": 0.100} ), }
def __init__(self, weights_path: Path = None, step: float = 0.0333): try: weights_path = Path(weights_path) except TypeError as e: msg = ( f'"weights_path" must be str, bytes, or os.PathLike object, not {type(weights_path).__name__}.' ) raise TypeError(msg) self._model = Pretrained(weights_path, step=step)
def validate_epoch(self, epoch, validation_data, device=None, batch_size=32, n_jobs=1, duration=None, step=0.25, **kwargs): pretrained = Pretrained(validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device) domain = self.task_.domain domains = pretrained.classes y_true_file, y_pred_file = [], [] for current_file in validation_data: y_pred = pretrained(current_file).data.argmax(axis=1) y_pred_file.append(Counter(y_pred).most_common(1)[0][0]) y_true = domains.index(current_file[domain]) y_true_file.append(y_true) accuracy = np.mean(np.array(y_true_file) == np.array(y_pred_file)) return { 'metric': 'accuracy', 'minimize': False, 'value': float(accuracy) }
continue ratio = result.loc[True] / result.loc[False] minimise_metric = abs((ratio) - 1) rows.append({ "metric": minimise_metric, "threshold": i, "ratio": ratio }) _ = pd.DataFrame(rows) return _.sort_values("metric").iloc[0] expt_name = "embeddings_voxceleb_19epochfinetuned" emb = Pretrained( validate_dir= '/media/ben/datadrive/Software/pyannote-audio/data/ami/voxceleb_finetuneexp2_loose/train/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/', epoch=19, device="cuda") os.environ[ "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/" # speaker embedding model trained on AMI training set # emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb') expt_root = "/media/ben/datadrive/Zalo/voice-verification/" dataset_path = os.path.abspath( os.path.join(expt_root, "Train-Test-Data/public-test.csv")) df_test_sub = pd.read_csv(dataset_path) filename2embedding = {} with open(f"{expt_name}embedding_public.pickle", "rb") as input_file: filename2embedding = pickle.load(input_file)
from tqdm import tqdm import torch from pathlib import Path import numpy as np from tqdm import tqdm import pandas as pd from pyannote.audio.features import Pretrained from pyannote.core.utils.distance import l2_normalize from pyannote.core.utils.distance import cdist import os threshold = 0.792 # from val set expt_name = "embeddings_voxceleb_20epochfinetuned" emb = Pretrained( validate_dir= '/media/ben/datadrive/Software/pyannote-audio/data/ami/tmp2/train/ZALODATASET.SpeakerVerification.MixHeadset.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.MixHeadset.val/', epoch=20) os.environ[ "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/" # speaker embedding model trained on AMI training set # emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb') expt_root = "/media/ben/datadrive/Zalo/voice-verification/" dataset_path = os.path.abspath( os.path.join(expt_root, "Train-Test-Data/public-test.csv")) df_test_sub = pd.read_csv(dataset_path) with tqdm(total=len(df_test_sub)) as pbar: for i, row in df_test_sub.iterrows(): audio1_embedding = np.mean(emb({ "uri": row["audio_1"],
def _validate_epoch_diarization( self, epoch, validation_data, protocol=None, subset: Subset = "development", device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, duration: float = None, step: float = 0.25, metric: str = None, **kwargs, ): # initialize embedding extraction pretrained = Pretrained( validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device, ) preprocessors = self.preprocessors_ if "audio" not in preprocessors: preprocessors["audio"] = FileFinder() if "duration" not in preprocessors: preprocessors["duration"] = get_audio_duration _protocol = get_protocol(protocol, preprocessors=preprocessors) Z, t = dict(), dict() min_d, max_d = np.inf, -np.inf for current_file in getattr(_protocol, subset)(): uri = get_unique_identifier(current_file) uem = get_annotated(current_file) reference = current_file["annotation"] X_, t_ = [], [] embedding = pretrained(current_file) for i, (turn, _) in enumerate(reference.itertracks()): # extract embedding for current speech turn x_ = embedding.crop(turn, mode="center") if len(x_) < 1: x_ = embedding.crop(turn, mode="loose") if len(x_) < 1: msg = f"No embedding for {turn} in {uri:s}." raise ValueError(msg) # each speech turn is represented by its average embedding X_.append(np.mean(x_, axis=0)) t_.append(turn) X_ = np.array(X_) # apply hierarchical agglomerative clustering # all the way up to just one cluster (ie complete dendrogram) D = pdist(X_, metric=metric) min_d = min(np.min(D), min_d) max_d = max(np.max(D), max_d) Z[uri] = linkage(X_, method="pool", metric=metric) t[uri] = np.array(t_) def fun(threshold): _metric = DiarizationPurityCoverageFMeasure(weighted=False) for current_file in getattr(_protocol, subset)(): uri = get_unique_identifier(current_file) uem = get_annotated(current_file) reference = current_file["annotation"] clusters = fcluster(Z[uri], threshold, criterion="distance") hypothesis = Annotation(uri=uri) for (start_time, end_time), cluster in zip(t[uri], clusters): hypothesis[Segment(start_time, end_time)] = cluster _ = _metric(reference, hypothesis, uem=uem) return 1.0 - abs(_metric) res = scipy.optimize.minimize_scalar(fun, bounds=(0.0, 1.0), method="bounded", options={"maxiter": 10}) threshold = res.x.item() return { "metric": "diarization_fscore", "minimize": False, "value": float(1.0 - res.fun), }
def _validate_epoch_verification( self, epoch, validation_data, protocol=None, subset: Subset = "development", device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, duration: float = None, step: float = 0.25, metric: str = None, **kwargs, ): # initialize embedding extraction pretrained = Pretrained( validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device, ) preprocessors = self.preprocessors_ if "audio" not in preprocessors: preprocessors["audio"] = FileFinder() if "duration" not in preprocessors: preprocessors["duration"] = get_audio_duration _protocol = get_protocol(protocol, preprocessors=preprocessors) y_true, y_pred, cache = [], [], {} for trial in getattr(_protocol, f"{subset}_trial")(): # compute embedding for file1 file1 = trial["file1"] hash1 = self.get_hash(file1) if hash1 in cache: emb1 = cache[hash1] else: emb1 = self.get_embedding(file1, pretrained) cache[hash1] = emb1 # compute embedding for file2 file2 = trial["file2"] hash2 = self.get_hash(file2) if hash2 in cache: emb2 = cache[hash2] else: emb2 = self.get_embedding(file2, pretrained) cache[hash2] = emb2 # compare embeddings distance = cdist(emb1, emb2, metric=metric)[0, 0] y_pred.append(distance) y_true.append(trial["reference"]) _, _, _, eer = det_curve(np.array(y_true), np.array(y_pred), distances=True) return { "metric": "equal_error_rate", "minimize": True, "value": float(eer) }
def validate_epoch(self, epoch, validation_data, device=None, batch_size=32, n_jobs=1, duration=None, step=0.25, **kwargs): # compute (and store) SAD scores pretrained = Pretrained(validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device) for current_file in validation_data: current_file['scores'] = pretrained(current_file) # pipeline pipeline = self.Pipeline(scores="@scores", fscore=True) def fun(threshold): pipeline.instantiate({ 'onset': threshold, 'offset': threshold, 'min_duration_on': 0.100, 'min_duration_off': 0.100, 'pad_onset': 0., 'pad_offset': 0. }) metric = pipeline.get_metric(parallel=True) validate = partial(validate_helper_func, pipeline=pipeline, metric=metric) if n_jobs > 1: _ = self.pool_.map(validate, validation_data) else: for file in validation_data: _ = validate(file) return 1. - abs(metric) res = scipy.optimize.minimize_scalar(fun, bounds=(0., 1.), method='bounded', options={'maxiter': 10}) threshold = res.x.item() return { 'metric': self.validation_criterion(None), 'minimize': False, 'value': float(1. - res.fun), 'pipeline': pipeline.instantiate({ 'onset': threshold, 'offset': threshold, 'min_duration_on': 0.100, 'min_duration_off': 0.100, 'pad_onset': 0., 'pad_offset': 0. }) }
def apply_pretrained(validate_dir: Path, protocol_name: str, subset: Optional[str] = "test", duration: Optional[float] = None, step: float = 0.25, device: Optional[torch.device] = None, batch_size: int = 32, pretrained: Optional[str] = None, Pipeline: type = None, **kwargs): """Apply pre-trained model Parameters ---------- validate_dir : Path protocol_name : `str` subset : 'train' | 'development' | 'test', optional Defaults to 'test'. duration : `float`, optional step : `float`, optional device : `torch.device`, optional batch_size : `int`, optional pretrained : `str`, optional Pipeline : `type` """ if pretrained is None: pretrained = Pretrained(validate_dir=validate_dir, duration=duration, step=step, batch_size=batch_size, device=device) output_dir = validate_dir / 'apply' / f'{pretrained.epoch_:04d}' else: if pretrained in torch.hub.list('pyannote/pyannote-audio'): output_dir = validate_dir / pretrained else: output_dir = validate_dir pretrained = Wrapper(pretrained, duration=duration, step=step, batch_size=batch_size, device=device) params = {} try: params['classes'] = pretrained.classes except AttributeError as e: pass try: params['dimension'] = pretrained.dimension except AttributeError as e: pass # create metadata file at root that contains # sliding window and dimension information precomputed = Precomputed(root_dir=output_dir, sliding_window=pretrained.sliding_window, **params) # file generator protocol = get_protocol(protocol_name, progress=True, preprocessors=pretrained.preprocessors_) for current_file in getattr(protocol, subset)(): fX = pretrained(current_file) precomputed.dump(current_file, fX) # do not proceed with the full pipeline # when there is no such thing for current task if Pipeline is None: return # do not proceed with the full pipeline when its parameters cannot be loaded. # this might happen when applying a model that has not been validated yet try: pipeline_params = pretrained.pipeline_params_ except AttributeError as e: return # instantiate pipeline pipeline = Pipeline(scores=output_dir) pipeline.instantiate(pipeline_params) # load pipeline metric (when available) try: metric = pipeline.get_metric() except NotImplementedError as e: metric = None # apply pipeline and dump output to RTTM files output_rttm = output_dir / f'{protocol_name}.{subset}.rttm' with open(output_rttm, 'w') as fp: for current_file in getattr(protocol, subset)(): hypothesis = pipeline(current_file) pipeline.write_rttm(fp, hypothesis) # compute evaluation metric (when possible) if 'annotation' not in current_file: metric = None # compute evaluation metric (when available) if metric is None: continue reference = current_file['annotation'] uem = get_annotated(current_file) _ = metric(reference, hypothesis, uem=uem) # print pipeline metric (when available) if metric is None: return output_eval = output_dir / f'{protocol_name}.{subset}.eval' with open(output_eval, 'w') as fp: fp.write(str(metric))
def __init__(self, wrappable: Wrappable, **params): super().__init__() from pyannote.audio.features import Pretrained from pyannote.audio.features import Precomputed from pyannote.audio.features import FeatureExtraction from pyannote.audio.features import RawAudio scorer = None msg = "" # corner if isinstance(wrappable, dict): wrappable, custom_params = dict(wrappable).popitem() params.update(**custom_params) # If `wrappable` already complies with the `FeatureExtraction` API , it # is kept unchanged. This includes instances of any `FeatureExtraction` # subclass,`RawAudio` instances, `Precomputed` instances, and # `Pretrained` instances. if isinstance(wrappable, (FeatureExtraction, RawAudio, Pretrained, Precomputed)): scorer = wrappable elif Path(wrappable).is_dir(): directory = Path(wrappable) # If `wrappable` is a `Path` to a directory containing precomputed # features or scores, wrap the corresponding `Precomputed` instance try: scorer = Precomputed(root_dir=directory) except Exception as e: scorer = None # If `wrappable` is a `Path` to a validation directory, # wrap the corresponding `Pretrained` instance if scorer is None: try: scorer = Pretrained(validate_dir=directory, **params) except Exception as e: scorer = None if scorer is None: msg = (f'"{wrappable}" directory does not seem to be the path ' f"to precomputed features nor the path to a model " f"validation step.") # If `wrappable` is a `Path` to a pretrined model checkpoint, # wrap the corresponding `Pretrained` instance elif Path(wrappable).is_file(): checkpoint = Path(wrappable) try: validate_dir = checkpoint.parents[1] / "validate" / "fake" epoch = int(checkpoint.stem) scorer = Pretrained(validate_dir=validate_dir, epoch=epoch, **params) except Exception as e: msg = (f'"{wrappable}" directory does not seem to be the path ' f"to a pretrained model checkpoint.") scorer = None elif isinstance(wrappable, Text): # If `wrappable` is a `Text` starting with '@' such as '@key', # it means that one should read the "key" key of protocol files if wrappable.startswith("@"): key = wrappable[1:] scorer = partial(_use_existing_key, key) # scorer = lambda current_file: current_file[key] # If `wrappable` is a `Text` containing the name of an existing # `torch.hub` model, wrap the corresponding `Pretrained`. else: try: import torch scorer = torch.hub.load("pyannote/pyannote-audio", wrappable, **params) if not isinstance(scorer, Pretrained): msg = ( f'"{wrappable}" exists on torch.hub but does not ' f"return a `Pretrained` model instance.") scorer = None except Exception as e: msg = (f"Could not load {wrappable} model from torch.hub. " f"The following exception was raised:\n{e}") scorer = None # warn the user the something went wrong if scorer is None: raise ValueError(msg) self.scorer_ = scorer
def _validate_epoch_verification(self, epoch, validation_data, protocol=None, subset='development', device: Optional[torch.device] = None, batch_size: int = 32, n_jobs: int = 1, duration: float = None, step: float = 0.25, metric: str = None, **kwargs): # initialize embedding extraction pretrained = Pretrained(validate_dir=self.validate_dir_, epoch=epoch, duration=duration, step=step, batch_size=batch_size, device=device) _protocol = get_protocol(protocol, progress=False, preprocessors=self.preprocessors_) y_true, y_pred, cache = [], [], {} for trial in getattr(_protocol, '{0}_trial'.format(subset))(): # compute embedding for file1 file1 = trial['file1'] hash1 = self.get_hash(file1) if hash1 in cache: emb1 = cache[hash1] else: emb1 = self.get_embedding(file1, pretrained) cache[hash1] = emb1 # compute embedding for file2 file2 = trial['file2'] hash2 = self.get_hash(file2) if hash2 in cache: emb2 = cache[hash2] else: emb2 = self.get_embedding(file2, pretrained) cache[hash2] = emb2 # compare embeddings distance = cdist(emb1, emb2, metric=metric)[0, 0] y_pred.append(distance) y_true.append(trial['reference']) _, _, _, eer = det_curve(np.array(y_true), np.array(y_pred), distances=True) return { 'metric': 'equal_error_rate', 'minimize': True, 'value': float(eer) }