Python Pretrained示例，pyannote.audio.features.Pretrained Python示例

示例#1

0

显示文件

    def __init__(self):
        self.engine = Pretrained(
            validate_dir='/home/src/model_files/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/',
            epoch=31,
            device="cpu")

        self.filename2embedding = {}

示例#2

0

显示文件

文件： change_detection.py 项目： zhangpengpengpeng/pyannote-audio

    def validate_epoch(
        self,
        epoch,
        validation_data,
        device=None,
        batch_size=32,
        diarization=False,
        n_jobs=1,
        duration=None,
        step=0.25,
        **kwargs
    ):

        # compute (and store) SCD scores
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        for current_file in validation_data:
            current_file["scores"] = pretrained(current_file)

        # pipeline
        pipeline = self.Pipeline(scores="@scores", fscore=True, diarization=diarization)

        def fun(threshold):
            pipeline.instantiate({"alpha": threshold, "min_duration": 0.100})
            metric = pipeline.get_metric(parallel=True)
            validate = partial(validate_helper_func, pipeline=pipeline, metric=metric)
            if n_jobs > 1:
                _ = self.pool_.map(validate, validation_data)
            else:
                for file in validation_data:
                    _ = validate(file)

            return 1.0 - abs(metric)

        res = scipy.optimize.minimize_scalar(
            fun, bounds=(0.0, 1.0), method="bounded", options={"maxiter": 10}
        )

        threshold = res.x.item()

        return {
            "metric": self.validation_criterion(None, diarization=diarization),
            "minimize": False,
            "value": float(1.0 - res.fun),
            "pipeline": pipeline.instantiate(
                {"alpha": threshold, "min_duration": 0.100}
            ),
        }

示例#3

0

显示文件

    def __init__(self, weights_path: Path = None, step: float = 0.0333):

        try:
            weights_path = Path(weights_path)
        except TypeError as e:
            msg = (
                f'"weights_path" must be str, bytes, or os.PathLike object, not {type(weights_path).__name__}.'
            )
            raise TypeError(msg)

        self._model = Pretrained(weights_path, step=step)

示例#4

0

显示文件

文件： domain_classification.py 项目： zhiqizhang/pyannote-audio

    def validate_epoch(self,
                       epoch,
                       validation_data,
                       device=None,
                       batch_size=32,
                       n_jobs=1,
                       duration=None,
                       step=0.25,
                       **kwargs):

        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        domain = self.task_.domain
        domains = pretrained.classes

        y_true_file, y_pred_file = [], []

        for current_file in validation_data:

            y_pred = pretrained(current_file).data.argmax(axis=1)
            y_pred_file.append(Counter(y_pred).most_common(1)[0][0])

            y_true = domains.index(current_file[domain])
            y_true_file.append(y_true)

        accuracy = np.mean(np.array(y_true_file) == np.array(y_pred_file))

        return {
            'metric': 'accuracy',
            'minimize': False,
            'value': float(accuracy)
        }

示例#5

0

显示文件

文件： voxceleb_19epochfinetune.py 项目： bml1g12/zalo-2020-challenge-voice-verification

            continue
        ratio = result.loc[True] / result.loc[False]
        minimise_metric = abs((ratio) - 1)
        rows.append({
            "metric": minimise_metric,
            "threshold": i,
            "ratio": ratio
        })
    _ = pd.DataFrame(rows)
    return _.sort_values("metric").iloc[0]


expt_name = "embeddings_voxceleb_19epochfinetuned"
emb = Pretrained(
    validate_dir=
    '/media/ben/datadrive/Software/pyannote-audio/data/ami/voxceleb_finetuneexp2_loose/train/ZALODATASET.SpeakerVerification.BenProtocol.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.BenProtocol.development/',
    epoch=19,
    device="cuda")

os.environ[
    "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/"
# speaker embedding model trained on AMI training set
# emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb')
expt_root = "/media/ben/datadrive/Zalo/voice-verification/"
dataset_path = os.path.abspath(
    os.path.join(expt_root, "Train-Test-Data/public-test.csv"))
df_test_sub = pd.read_csv(dataset_path)

filename2embedding = {}
with open(f"{expt_name}embedding_public.pickle", "rb") as input_file:
    filename2embedding = pickle.load(input_file)

示例#6

0

显示文件

from tqdm import tqdm
import torch
from pathlib import Path
import numpy as np
from tqdm import tqdm
import pandas as pd
from pyannote.audio.features import Pretrained
from pyannote.core.utils.distance import l2_normalize
from pyannote.core.utils.distance import cdist
import os

threshold = 0.792  # from val set
expt_name = "embeddings_voxceleb_20epochfinetuned"
emb = Pretrained(
    validate_dir=
    '/media/ben/datadrive/Software/pyannote-audio/data/ami/tmp2/train/ZALODATASET.SpeakerVerification.MixHeadset.train/validate_equal_error_rate/ZALODATASET.SpeakerVerification.MixHeadset.val/',
    epoch=20)

os.environ[
    "PYANNOTE_DATABASE_CONFIG"] = "/media/ben/datadrive/Software/pyannote-audio/data/ami/"
# speaker embedding model trained on AMI training set
# emb = torch.hub.load('pyannote/pyannote-audio', 'emb_voxceleb')
expt_root = "/media/ben/datadrive/Zalo/voice-verification/"
dataset_path = os.path.abspath(
    os.path.join(expt_root, "Train-Test-Data/public-test.csv"))
df_test_sub = pd.read_csv(dataset_path)
with tqdm(total=len(df_test_sub)) as pbar:
    for i, row in df_test_sub.iterrows():
        audio1_embedding = np.mean(emb({
            "uri":
            row["audio_1"],

示例#7

0

显示文件

文件： speaker_embedding.py 项目： zhangpengpengpeng/pyannote-audio

    def _validate_epoch_diarization(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        Z, t = dict(), dict()
        min_d, max_d = np.inf, -np.inf

        for current_file in getattr(_protocol, subset)():

            uri = get_unique_identifier(current_file)
            uem = get_annotated(current_file)
            reference = current_file["annotation"]

            X_, t_ = [], []
            embedding = pretrained(current_file)
            for i, (turn, _) in enumerate(reference.itertracks()):

                # extract embedding for current speech turn
                x_ = embedding.crop(turn, mode="center")
                if len(x_) < 1:
                    x_ = embedding.crop(turn, mode="loose")
                if len(x_) < 1:
                    msg = f"No embedding for {turn} in {uri:s}."
                    raise ValueError(msg)

                # each speech turn is represented by its average embedding
                X_.append(np.mean(x_, axis=0))
                t_.append(turn)

            X_ = np.array(X_)
            # apply hierarchical agglomerative clustering
            # all the way up to just one cluster (ie complete dendrogram)
            D = pdist(X_, metric=metric)
            min_d = min(np.min(D), min_d)
            max_d = max(np.max(D), max_d)

            Z[uri] = linkage(X_, method="pool", metric=metric)
            t[uri] = np.array(t_)

        def fun(threshold):

            _metric = DiarizationPurityCoverageFMeasure(weighted=False)

            for current_file in getattr(_protocol, subset)():

                uri = get_unique_identifier(current_file)
                uem = get_annotated(current_file)
                reference = current_file["annotation"]

                clusters = fcluster(Z[uri], threshold, criterion="distance")

                hypothesis = Annotation(uri=uri)
                for (start_time, end_time), cluster in zip(t[uri], clusters):
                    hypothesis[Segment(start_time, end_time)] = cluster

                _ = _metric(reference, hypothesis, uem=uem)

            return 1.0 - abs(_metric)

        res = scipy.optimize.minimize_scalar(fun,
                                             bounds=(0.0, 1.0),
                                             method="bounded",
                                             options={"maxiter": 10})

        threshold = res.x.item()

        return {
            "metric": "diarization_fscore",
            "minimize": False,
            "value": float(1.0 - res.fun),
        }

示例#8

0

显示文件

文件： speaker_embedding.py 项目： zhangpengpengpeng/pyannote-audio

    def _validate_epoch_verification(
        self,
        epoch,
        validation_data,
        protocol=None,
        subset: Subset = "development",
        device: Optional[torch.device] = None,
        batch_size: int = 32,
        n_jobs: int = 1,
        duration: float = None,
        step: float = 0.25,
        metric: str = None,
        **kwargs,
    ):

        # initialize embedding extraction
        pretrained = Pretrained(
            validate_dir=self.validate_dir_,
            epoch=epoch,
            duration=duration,
            step=step,
            batch_size=batch_size,
            device=device,
        )

        preprocessors = self.preprocessors_
        if "audio" not in preprocessors:
            preprocessors["audio"] = FileFinder()
        if "duration" not in preprocessors:
            preprocessors["duration"] = get_audio_duration
        _protocol = get_protocol(protocol, preprocessors=preprocessors)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, f"{subset}_trial")():

            # compute embedding for file1
            file1 = trial["file1"]
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial["file2"]
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial["reference"])
        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            "metric": "equal_error_rate",
            "minimize": True,
            "value": float(eer)
        }

示例#9

0

显示文件

文件： speech_detection.py 项目： zhiqizhang/pyannote-audio

    def validate_epoch(self,
                       epoch,
                       validation_data,
                       device=None,
                       batch_size=32,
                       n_jobs=1,
                       duration=None,
                       step=0.25,
                       **kwargs):

        # compute (and store) SAD scores
        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        for current_file in validation_data:
            current_file['scores'] = pretrained(current_file)

        # pipeline
        pipeline = self.Pipeline(scores="@scores", fscore=True)

        def fun(threshold):
            pipeline.instantiate({
                'onset': threshold,
                'offset': threshold,
                'min_duration_on': 0.100,
                'min_duration_off': 0.100,
                'pad_onset': 0.,
                'pad_offset': 0.
            })
            metric = pipeline.get_metric(parallel=True)
            validate = partial(validate_helper_func,
                               pipeline=pipeline,
                               metric=metric)
            if n_jobs > 1:
                _ = self.pool_.map(validate, validation_data)
            else:
                for file in validation_data:
                    _ = validate(file)

            return 1. - abs(metric)

        res = scipy.optimize.minimize_scalar(fun,
                                             bounds=(0., 1.),
                                             method='bounded',
                                             options={'maxiter': 10})

        threshold = res.x.item()

        return {
            'metric':
            self.validation_criterion(None),
            'minimize':
            False,
            'value':
            float(1. - res.fun),
            'pipeline':
            pipeline.instantiate({
                'onset': threshold,
                'offset': threshold,
                'min_duration_on': 0.100,
                'min_duration_off': 0.100,
                'pad_onset': 0.,
                'pad_offset': 0.
            })
        }

示例#10

0

显示文件

def apply_pretrained(validate_dir: Path,
                     protocol_name: str,
                     subset: Optional[str] = "test",
                     duration: Optional[float] = None,
                     step: float = 0.25,
                     device: Optional[torch.device] = None,
                     batch_size: int = 32,
                     pretrained: Optional[str] = None,
                     Pipeline: type = None,
                     **kwargs):
    """Apply pre-trained model

    Parameters
    ----------
    validate_dir : Path
    protocol_name : `str`
    subset : 'train' | 'development' | 'test', optional
        Defaults to 'test'.
    duration : `float`, optional
    step : `float`, optional
    device : `torch.device`, optional
    batch_size : `int`, optional
    pretrained : `str`, optional
    Pipeline : `type`
    """

    if pretrained is None:
        pretrained = Pretrained(validate_dir=validate_dir,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)
        output_dir = validate_dir / 'apply' / f'{pretrained.epoch_:04d}'
    else:

        if pretrained in torch.hub.list('pyannote/pyannote-audio'):
            output_dir = validate_dir / pretrained
        else:
            output_dir = validate_dir

        pretrained = Wrapper(pretrained,
                             duration=duration,
                             step=step,
                             batch_size=batch_size,
                             device=device)

    params = {}
    try:
        params['classes'] = pretrained.classes
    except AttributeError as e:
        pass
    try:
        params['dimension'] = pretrained.dimension
    except AttributeError as e:
        pass

    # create metadata file at root that contains
    # sliding window and dimension information
    precomputed = Precomputed(root_dir=output_dir,
                              sliding_window=pretrained.sliding_window,
                              **params)

    # file generator
    protocol = get_protocol(protocol_name,
                            progress=True,
                            preprocessors=pretrained.preprocessors_)

    for current_file in getattr(protocol, subset)():
        fX = pretrained(current_file)
        precomputed.dump(current_file, fX)

    # do not proceed with the full pipeline
    # when there is no such thing for current task
    if Pipeline is None:
        return

    # do not proceed with the full pipeline when its parameters cannot be loaded.
    # this might happen when applying a model that has not been validated yet
    try:
        pipeline_params = pretrained.pipeline_params_
    except AttributeError as e:
        return

    # instantiate pipeline
    pipeline = Pipeline(scores=output_dir)
    pipeline.instantiate(pipeline_params)

    # load pipeline metric (when available)
    try:
        metric = pipeline.get_metric()
    except NotImplementedError as e:
        metric = None

    # apply pipeline and dump output to RTTM files
    output_rttm = output_dir / f'{protocol_name}.{subset}.rttm'
    with open(output_rttm, 'w') as fp:
        for current_file in getattr(protocol, subset)():
            hypothesis = pipeline(current_file)
            pipeline.write_rttm(fp, hypothesis)

            # compute evaluation metric (when possible)
            if 'annotation' not in current_file:
                metric = None

            # compute evaluation metric (when available)
            if metric is None:
                continue

            reference = current_file['annotation']
            uem = get_annotated(current_file)
            _ = metric(reference, hypothesis, uem=uem)

    # print pipeline metric (when available)
    if metric is None:
        return

    output_eval = output_dir / f'{protocol_name}.{subset}.eval'
    with open(output_eval, 'w') as fp:
        fp.write(str(metric))

示例#11

0

显示文件

    def __init__(self, wrappable: Wrappable, **params):
        super().__init__()

        from pyannote.audio.features import Pretrained
        from pyannote.audio.features import Precomputed
        from pyannote.audio.features import FeatureExtraction
        from pyannote.audio.features import RawAudio

        scorer = None
        msg = ""

        # corner
        if isinstance(wrappable, dict):
            wrappable, custom_params = dict(wrappable).popitem()
            params.update(**custom_params)

        # If `wrappable` already complies with the `FeatureExtraction` API , it
        # is kept unchanged. This includes instances of any `FeatureExtraction`
        # subclass,`RawAudio` instances, `Precomputed` instances, and
        # `Pretrained` instances.
        if isinstance(wrappable,
                      (FeatureExtraction, RawAudio, Pretrained, Precomputed)):
            scorer = wrappable

        elif Path(wrappable).is_dir():
            directory = Path(wrappable)

            # If `wrappable` is a `Path` to a directory containing precomputed
            # features or scores, wrap the corresponding `Precomputed` instance
            try:
                scorer = Precomputed(root_dir=directory)
            except Exception as e:
                scorer = None

            # If `wrappable` is a `Path` to a validation directory,
            # wrap the corresponding `Pretrained` instance
            if scorer is None:
                try:
                    scorer = Pretrained(validate_dir=directory, **params)
                except Exception as e:
                    scorer = None

            if scorer is None:
                msg = (f'"{wrappable}" directory does not seem to be the path '
                       f"to precomputed features nor the path to a model "
                       f"validation step.")

        # If `wrappable` is a `Path` to a pretrined model checkpoint,
        # wrap the corresponding `Pretrained` instance
        elif Path(wrappable).is_file():
            checkpoint = Path(wrappable)

            try:
                validate_dir = checkpoint.parents[1] / "validate" / "fake"
                epoch = int(checkpoint.stem)
                scorer = Pretrained(validate_dir=validate_dir,
                                    epoch=epoch,
                                    **params)
            except Exception as e:
                msg = (f'"{wrappable}" directory does not seem to be the path '
                       f"to a pretrained model checkpoint.")
                scorer = None

        elif isinstance(wrappable, Text):

            # If `wrappable` is a `Text` starting with '@' such as '@key',
            # it means that one should read the "key" key of protocol files
            if wrappable.startswith("@"):
                key = wrappable[1:]

                scorer = partial(_use_existing_key, key)
                # scorer = lambda current_file: current_file[key]

            # If `wrappable` is a `Text` containing the name of an existing
            # `torch.hub` model, wrap the corresponding `Pretrained`.
            else:
                try:
                    import torch

                    scorer = torch.hub.load("pyannote/pyannote-audio",
                                            wrappable, **params)
                    if not isinstance(scorer, Pretrained):
                        msg = (
                            f'"{wrappable}" exists on torch.hub but does not '
                            f"return a `Pretrained` model instance.")
                        scorer = None

                except Exception as e:
                    msg = (f"Could not load {wrappable} model from torch.hub. "
                           f"The following exception was raised:\n{e}")
                    scorer = None

        # warn the user the something went wrong
        if scorer is None:
            raise ValueError(msg)

        self.scorer_ = scorer

示例#12

0

显示文件

文件： speaker_embedding.py 项目： zhiqizhang/pyannote-audio

    def _validate_epoch_verification(self,
                                     epoch,
                                     validation_data,
                                     protocol=None,
                                     subset='development',
                                     device: Optional[torch.device] = None,
                                     batch_size: int = 32,
                                     n_jobs: int = 1,
                                     duration: float = None,
                                     step: float = 0.25,
                                     metric: str = None,
                                     **kwargs):

        # initialize embedding extraction
        pretrained = Pretrained(validate_dir=self.validate_dir_,
                                epoch=epoch,
                                duration=duration,
                                step=step,
                                batch_size=batch_size,
                                device=device)

        _protocol = get_protocol(protocol,
                                 progress=False,
                                 preprocessors=self.preprocessors_)

        y_true, y_pred, cache = [], [], {}

        for trial in getattr(_protocol, '{0}_trial'.format(subset))():

            # compute embedding for file1
            file1 = trial['file1']
            hash1 = self.get_hash(file1)
            if hash1 in cache:
                emb1 = cache[hash1]
            else:
                emb1 = self.get_embedding(file1, pretrained)
                cache[hash1] = emb1

            # compute embedding for file2
            file2 = trial['file2']
            hash2 = self.get_hash(file2)
            if hash2 in cache:
                emb2 = cache[hash2]
            else:
                emb2 = self.get_embedding(file2, pretrained)
                cache[hash2] = emb2

            # compare embeddings
            distance = cdist(emb1, emb2, metric=metric)[0, 0]
            y_pred.append(distance)

            y_true.append(trial['reference'])

        _, _, _, eer = det_curve(np.array(y_true),
                                 np.array(y_pred),
                                 distances=True)

        return {
            'metric': 'equal_error_rate',
            'minimize': True,
            'value': float(eer)
        }