示例#1
0
    def __init__(self):
        self.engine = Pretrained(
            validate_dir='/home/src/model_files/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/',
            epoch=31,
            device="cpu")

        self.filename2embedding = {}
    def validate_epoch(
        self,
        epoch,
        validation_data,
        device=None,
        batch_size=32,
        diarization=False,
        n_jobs=1,
        duration=None,
        step=0.25,
        **kwargs
    ):

        # compute (and store) SCD scores
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        for current_file in validation_data:
            current_file["scores"] = pretrained(current_file)

        # pipeline
        pipeline = self.Pipeline(scores="@scores", fscore=True, diarization=diarization)

        def fun(threshold):
            pipeline.instantiate({"alpha": threshold, "min_duration": 0.100})
            metric = pipeline.get_metric(parallel=True)
            validate = partial(validate_helper_func, pipeline=pipeline, metric=metric)
            if n_jobs > 1:
                _ = self.pool_.map(validate, validation_data)
            else:
                for file in validation_data:
                    _ = validate(file)

            return 1.0 - abs(metric)

        res = scipy.optimize.minimize_scalar(
            fun, bounds=(0.0, 1.0), method="bounded", options={"maxiter": 10}
        )

        threshold = res.x.item()

        return {
            "metric": self.validation_criterion(None, diarization=diarization),
            "minimize": False,
            "value": float(1.0 - res.fun),
            "pipeline": pipeline.instantiate(
                {"alpha": threshold, "min_duration": 0.100}
            ),
        }
示例#3
0
    def __init__(self, weights_path: Path = None, step: float = 0.0333):

        try:
            weights_path = Path(weights_path)
        except TypeError as e:
            msg = (
                f'"weights_path" must be str, bytes, or os.PathLike object, not {type(weights_path).__name__}.'
            )
            raise TypeError(msg)

        self._model = Pretrained(weights_path, step=step)
    def validate_epoch(self,
                       epoch,
                       validation_data,
                       device=None,
                       batch_size=32,
                       n_jobs=1,
                       duration=None,
                       step=0.25,
                       **kwargs):

        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        domain = self.task_.domain
        domains = pretrained.classes

        y_true_file, y_pred_file = [], []

        for current_file in validation_data:

            y_pred = pretrained(current_file).data.argmax(axis=1)
            y_pred_file.append(Counter(y_pred).most_common(1)[0][0])

            y_true = domains.index(current_file[domain])
            y_true_file.append(y_true)

        accuracy = np.mean(np.array(y_true_file) == np.array(y_pred_file))

        return {
            'metric': 'accuracy',
            'minimize': False,
            'value': float(accuracy)
        }
            continue
        ratio = result.loc[True] / result.loc[False]
        minimise_metric = abs((ratio) - 1)
        rows.append({
            "metric": minimise_metric,
            "threshold": i,
            "ratio": ratio
        })
    _ = pd.DataFrame(rows)
    return _.sort_values("metric").iloc[0]


expt_name = "embeddings_voxceleb_19epochfinetuned"
emb = Pretrained(
    validate_dir=
    '/media/ben/datadrive/Software/pyannote-audio/data/ami/voxceleb_finetuneexp2_loose/train/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/',
    epoch=19,
    device="cuda")

os.environ[
    "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/"
# speaker embedding model trained on AMI training set
# emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb')
expt_root = "/media/ben/datadrive/Zalo/voice-verification/"
dataset_path = os.path.abspath(
    os.path.join(expt_root, "Train-Test-Data/public-test.csv"))
df_test_sub = pd.read_csv(dataset_path)

filename2embedding = {}
with open(f"{expt_name}embedding_public.pickle", "rb") as input_file:
    filename2embedding = pickle.load(input_file)
示例#6
0
from tqdm import tqdm
import torch
from pathlib import Path
import numpy as np
from tqdm import tqdm
import pandas as pd
from pyannote.audio.features import Pretrained
from pyannote.core.utils.distance import l2_normalize
from pyannote.core.utils.distance import cdist
import os

threshold = 0.792  # from val set
expt_name = "embeddings_voxceleb_20epochfinetuned"
emb = Pretrained(
    validate_dir=
    '/media/ben/datadrive/Software/pyannote-audio/data/ami/tmp2/train/ZALODATASET.SpeakerVerification.MixHeadset.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.MixHeadset.val/',
    epoch=20)

os.environ[
    "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/"
# speaker embedding model trained on AMI training set
# emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb')
expt_root = "/media/ben/datadrive/Zalo/voice-verification/"
dataset_path = os.path.abspath(
    os.path.join(expt_root, "Train-Test-Data/public-test.csv"))
df_test_sub = pd.read_csv(dataset_path)
with tqdm(total=len(df_test_sub)) as pbar:
    for i, row in df_test_sub.iterrows():
        audio1_embedding = np.mean(emb({
            "uri":
            row["audio_1"],
    def _validate_epoch_diarization(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        Z, t = dict(), dict()
        min_d, max_d = np.inf, -np.inf

        for current_file in getattr(_protocol, subset)():

            uri = get_unique_identifier(current_file)
            uem = get_annotated(current_file)
            reference = current_file["annotation"]

            X_, t_ = [], []
            embedding = pretrained(current_file)
            for i, (turn, _) in enumerate(reference.itertracks()):

                # extract embedding for current speech turn
                x_ = embedding.crop(turn, mode="center")
                if len(x_) < 1:
                    x_ = embedding.crop(turn, mode="loose")
                if len(x_) < 1:
                    msg = f"No embedding for {turn} in {uri:s}."
                    raise ValueError(msg)

                # each speech turn is represented by its average embedding
                X_.append(np.mean(x_, axis=0))
                t_.append(turn)

            X_ = np.array(X_)
            # apply hierarchical agglomerative clustering
            # all the way up to just one cluster (ie complete dendrogram)
            D = pdist(X_, metric=metric)
            min_d = min(np.min(D), min_d)
            max_d = max(np.max(D), max_d)

            Z[uri] = linkage(X_, method="pool", metric=metric)
            t[uri] = np.array(t_)

        def fun(threshold):

            _metric = DiarizationPurityCoverageFMeasure(weighted=False)

            for current_file in getattr(_protocol, subset)():

                uri = get_unique_identifier(current_file)
                uem = get_annotated(current_file)
                reference = current_file["annotation"]

                clusters = fcluster(Z[uri], threshold, criterion="distance")

                hypothesis = Annotation(uri=uri)
                for (start_time, end_time), cluster in zip(t[uri], clusters):
                    hypothesis[Segment(start_time, end_time)] = cluster

                _ = _metric(reference, hypothesis, uem=uem)

            return 1.0 - abs(_metric)

        res = scipy.optimize.minimize_scalar(fun,
                                             bounds=(0.0, 1.0),
                                             method="bounded",
                                             options={"maxiter": 10})

        threshold = res.x.item()

        return {
            "metric": "diarization_fscore",
            "minimize": False,
            "value": float(1.0 - res.fun),
        }
    def _validate_epoch_verification(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, f"{subset}_trial")():

            # compute embedding for file1
            file1 = trial["file1"]
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial["file2"]
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial["reference"])
        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            "metric": "equal_error_rate",
            "minimize": True,
            "value": float(eer)
        }
    def validate_epoch(self,
                       epoch,
                       validation_data,
                       device=None,
                       batch_size=32,
                       n_jobs=1,
                       duration=None,
                       step=0.25,
                       **kwargs):

        # compute (and store) SAD scores
        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        for current_file in validation_data:
            current_file['scores'] = pretrained(current_file)

        # pipeline
        pipeline = self.Pipeline(scores="@scores", fscore=True)

        def fun(threshold):
            pipeline.instantiate({
                'onset': threshold,
                'offset': threshold,
                'min_duration_on': 0.100,
                'min_duration_off': 0.100,
                'pad_onset': 0.,
                'pad_offset': 0.
            })
            metric = pipeline.get_metric(parallel=True)
            validate = partial(validate_helper_func,
                               pipeline=pipeline,
                               metric=metric)
            if n_jobs > 1:
                _ = self.pool_.map(validate, validation_data)
            else:
                for file in validation_data:
                    _ = validate(file)

            return 1. - abs(metric)

        res = scipy.optimize.minimize_scalar(fun,
                                             bounds=(0., 1.),
                                             method='bounded',
                                             options={'maxiter': 10})

        threshold = res.x.item()

        return {
            'metric':
            self.validation_criterion(None),
            'minimize':
            False,
            'value':
            float(1. - res.fun),
            'pipeline':
            pipeline.instantiate({
                'onset': threshold,
                'offset': threshold,
                'min_duration_on': 0.100,
                'min_duration_off': 0.100,
                'pad_onset': 0.,
                'pad_offset': 0.
            })
        }
示例#10
0
def apply_pretrained(validate_dir: Path,
                     protocol_name: str,
                     subset: Optional[str] = "test",
                     duration: Optional[float] = None,
                     step: float = 0.25,
                     device: Optional[torch.device] = None,
                     batch_size: int = 32,
                     pretrained: Optional[str] = None,
                     Pipeline: type = None,
                     **kwargs):
    """Apply pre-trained model

    Parameters
    ----------
    validate_dir : Path
    protocol_name : `str`
    subset : 'train' | 'development' | 'test', optional
        Defaults to 'test'.
    duration : `float`, optional
    step : `float`, optional
    device : `torch.device`, optional
    batch_size : `int`, optional
    pretrained : `str`, optional
    Pipeline : `type`
    """

    if pretrained is None:
        pretrained = Pretrained(validate_dir=validate_dir,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)
        output_dir = validate_dir / 'apply' / f'{pretrained.epoch_:04d}'
    else:

        if pretrained in torch.hub.list('pyannote/pyannote-audio'):
            output_dir = validate_dir / pretrained
        else:
            output_dir = validate_dir

        pretrained = Wrapper(pretrained,
                             duration=duration,
                             step=step,
                             batch_size=batch_size,
                             device=device)

    params = {}
    try:
        params['classes'] = pretrained.classes
    except AttributeError as e:
        pass
    try:
        params['dimension'] = pretrained.dimension
    except AttributeError as e:
        pass

    # create metadata file at root that contains
    # sliding window and dimension information
    precomputed = Precomputed(root_dir=output_dir,
                              sliding_window=pretrained.sliding_window,
                              **params)

    # file generator
    protocol = get_protocol(protocol_name,
                            progress=True,
                            preprocessors=pretrained.preprocessors_)

    for current_file in getattr(protocol, subset)():
        fX = pretrained(current_file)
        precomputed.dump(current_file, fX)

    # do not proceed with the full pipeline
    # when there is no such thing for current task
    if Pipeline is None:
        return

    # do not proceed with the full pipeline when its parameters cannot be loaded.
    # this might happen when applying a model that has not been validated yet
    try:
        pipeline_params = pretrained.pipeline_params_
    except AttributeError as e:
        return

    # instantiate pipeline
    pipeline = Pipeline(scores=output_dir)
    pipeline.instantiate(pipeline_params)

    # load pipeline metric (when available)
    try:
        metric = pipeline.get_metric()
    except NotImplementedError as e:
        metric = None

    # apply pipeline and dump output to RTTM files
    output_rttm = output_dir / f'{protocol_name}.{subset}.rttm'
    with open(output_rttm, 'w') as fp:
        for current_file in getattr(protocol, subset)():
            hypothesis = pipeline(current_file)
            pipeline.write_rttm(fp, hypothesis)

            # compute evaluation metric (when possible)
            if 'annotation' not in current_file:
                metric = None

            # compute evaluation metric (when available)
            if metric is None:
                continue

            reference = current_file['annotation']
            uem = get_annotated(current_file)
            _ = metric(reference, hypothesis, uem=uem)

    # print pipeline metric (when available)
    if metric is None:
        return

    output_eval = output_dir / f'{protocol_name}.{subset}.eval'
    with open(output_eval, 'w') as fp:
        fp.write(str(metric))
示例#11
0
    def __init__(self, wrappable: Wrappable, **params):
        super().__init__()

        from pyannote.audio.features import Pretrained
        from pyannote.audio.features import Precomputed
        from pyannote.audio.features import FeatureExtraction
        from pyannote.audio.features import RawAudio

        scorer = None
        msg = ""

        # corner
        if isinstance(wrappable, dict):
            wrappable, custom_params = dict(wrappable).popitem()
            params.update(**custom_params)

        # If `wrappable` already complies with the `FeatureExtraction` API , it
        # is kept unchanged. This includes instances of any `FeatureExtraction`
        # subclass,`RawAudio` instances, `Precomputed` instances, and
        # `Pretrained` instances.
        if isinstance(wrappable,
                      (FeatureExtraction, RawAudio, Pretrained, Precomputed)):
            scorer = wrappable

        elif Path(wrappable).is_dir():
            directory = Path(wrappable)

            # If `wrappable` is a `Path` to a directory containing precomputed
            # features or scores, wrap the corresponding `Precomputed` instance
            try:
                scorer = Precomputed(root_dir=directory)
            except Exception as e:
                scorer = None

            # If `wrappable` is a `Path` to a validation directory,
            # wrap the corresponding `Pretrained` instance
            if scorer is None:
                try:
                    scorer = Pretrained(validate_dir=directory, **params)
                except Exception as e:
                    scorer = None

            if scorer is None:
                msg = (f'"{wrappable}" directory does not seem to be the path '
                       f"to precomputed features nor the path to a model "
                       f"validation step.")

        # If `wrappable` is a `Path` to a pretrined model checkpoint,
        # wrap the corresponding `Pretrained` instance
        elif Path(wrappable).is_file():
            checkpoint = Path(wrappable)

            try:
                validate_dir = checkpoint.parents[1] / "validate" / "fake"
                epoch = int(checkpoint.stem)
                scorer = Pretrained(validate_dir=validate_dir,
                                    epoch=epoch,
                                    **params)
            except Exception as e:
                msg = (f'"{wrappable}" directory does not seem to be the path '
                       f"to a pretrained model checkpoint.")
                scorer = None

        elif isinstance(wrappable, Text):

            # If `wrappable` is a `Text` starting with '@' such as '@key',
            # it means that one should read the "key" key of protocol files
            if wrappable.startswith("@"):
                key = wrappable[1:]

                scorer = partial(_use_existing_key, key)
                # scorer = lambda current_file: current_file[key]

            # If `wrappable` is a `Text` containing the name of an existing
            # `torch.hub` model, wrap the corresponding `Pretrained`.
            else:
                try:
                    import torch

                    scorer = torch.hub.load("pyannote/pyannote-audio",
                                            wrappable, **params)
                    if not isinstance(scorer, Pretrained):
                        msg = (
                            f'"{wrappable}" exists on torch.hub but does not '
                            f"return a `Pretrained` model instance.")
                        scorer = None

                except Exception as e:
                    msg = (f"Could not load {wrappable} model from torch.hub. "
                           f"The following exception was raised:\n{e}")
                    scorer = None

        # warn the user the something went wrong
        if scorer is None:
            raise ValueError(msg)

        self.scorer_ = scorer
    def _validate_epoch_verification(self,
                                     epoch,
                                     validation_data,
                                     protocol=None,
                                     subset='development',
                                     device: Optional[torch.device] = None,
                                     batch_size: int = 32,
                                     n_jobs: int = 1,
                                     duration: float = None,
                                     step: float = 0.25,
                                     metric: str = None,
                                     **kwargs):

        # initialize embedding extraction
        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        _protocol = get_protocol(protocol,
                                 progress=False,
                                 preprocessors=self.preprocessors_)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, '{0}_trial'.format(subset))():

            # compute embedding for file1
            file1 = trial['file1']
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial['file2']
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial['reference'])

        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            'metric': 'equal_error_rate',
            'minimize': True,
            'value': float(eer)
        }