class EsterSpeakerDiarizationProtocol(SpeakerDiarizationProtocol): """Base speaker diarization protocol for ESTER database""" def __init__(self, preprocessors={}, **kwargs): super(EsterSpeakerDiarizationProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.uem_parser_ = UEMParser() self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotated parts # e.g. /data/{tv|radio|all}.{train|dev|test}.uem path = op.join( data_dir, '{protocol}.{subset}.uem'.format(subset=subset, protocol=protocol)) uems = self.uem_parser_.read(path) # load annotations path = op.join( data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(uems.uris): annotated = uems(uri) annotation = mdtms(uri) current_file = { 'database': 'Ester', 'uri': uri, 'annotated': annotated, 'annotation': annotation } yield current_file
class SwitchBoardSpeakerRecognitionProtocol(SpeakerDiarizationProtocol): """My first speaker diarization protocol """ def __init__(self, preprocessors={}, **kwargs): super(SwitchBoardSpeakerRecognitionProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotations path = op.join( data_dir, 'switchboard-{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(mdtms.uris): annotation = mdtms(uri) current_file = { 'database': 'SwitchBoard', 'uri': uri, 'annotation': annotation } yield current_file
class LibriSpeechSpeakerRecognitionProtocol(SpeakerDiarizationProtocol): """My first speaker diarization protocol """ def __init__(self, preprocessors={}, **kwargs): super(LibriSpeechSpeakerRecognitionProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotations path = op.join( data_dir, 'librispeech-{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(mdtms.uris): annotation = mdtms(uri) current_file = { 'database': 'LibriSpeech', 'uri': uri, 'annotation': annotation, # annotated part as pyannote.core.Timeline instance 'annotated': Timeline(uri=uri, segments=[annotation.get_timeline().extent()]) } yield current_file
def test_load(sample): parser = MDTMParser() annotations = parser.read(sample) speech1 = annotations(uri="uri1", modality="speech") assert list(speech1.itertracks(label=True)) == [ (Segment(1, 3.5), 0, 'alice'), (Segment(3, 7.5), 1, 'barbara'), (Segment(6, 9), 2, 'chris') ]
class EtapeSpeakerDiarizationProtocol(SpeakerDiarizationProtocol): """Base speaker diarization protocol for ETAPE database This class should be inherited from, not used directly. Parameters ---------- preprocessors : dict or (key, preprocessor) iterable When provided, each protocol item (dictionary) are preprocessed, such that item[key] = preprocessor(**item). In case 'preprocessor' is not callable, it should be a string containing placeholder for item keys (e.g. {'wav': '/path/to/{uri}.wav'}) """ def __init__(self, preprocessors={}, **kwargs): super(EtapeSpeakerDiarizationProtocol, self).__init__(preprocessors=preprocessors, **kwargs) self.uem_parser_ = UEMParser() self.mdtm_parser_ = MDTMParser() def _subset(self, protocol, subset): data_dir = op.join(op.dirname(op.realpath(__file__)), 'data') # load annotated parts # e.g. /data/{tv|radio|all}.{train|dev|test}.uem path = op.join( data_dir, '{protocol}.{subset}.uem'.format(subset=subset, protocol=protocol)) uems = self.uem_parser_.read(path) # load annotations path = op.join( data_dir, '{protocol}.{subset}.mdtm'.format(subset=subset, protocol=protocol)) mdtms = self.mdtm_parser_.read(path) for uri in sorted(uems.uris): annotated = uems(uri) annotation = mdtms(uri) current_file = { 'database': 'Etape', 'uri': uri, 'annotated': annotated, 'annotation': annotation } yield current_file
llss = [] trials = getattr(protocol, '{subset}_trial'.format(subset=subset))() for current_trial in trials: reference = current_trial.pop('reference') hypothesis = speaker_spotting_try_diarization(current_trial) llss.append(process_trial(current_trial, hypothesis)) import simplejson as json with open(output_file, 'w') as outfile: json.dump(llss, outfile) if arguments['automatic']: from pyannote.parser import MDTMParser diarization_mdtm = arguments['<diarization.mdtm>'] parser = MDTMParser() annotations = parser.read(diarization_mdtm) REFERENCE = {} for uri_part in annotations.uris: uri = uri_part.split('_')[0] + '.Mix-Headset' if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(annotations(uri=uri_part, modality="speaker")) llss = [] trials = getattr(protocol, '{subset}_trial'.format(subset=subset))() for current_trial in trials: reference = current_trial.pop('reference') hypothesis = speaker_spotting_try_diarization(current_trial)
from pyannote.database import get_protocol, FileFinder protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True) from pyannote.core import Annotation, Segment, Timeline REFERENCE = {} for current_file in protocol.development(): uri = current_file['uri'] if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(current_file['annotation']) from pyannote.parser import MDTMParser sad_dev = '/people/yin/projects/online_clustering/spotting/AMI.SpeakerSpotting.MixHeadset.development.mdtm' parser_dev = MDTMParser() annotations_dev = parser_dev.read(sad_dev) SAD = {} for item in protocol.development(): uri = item['uri'] SAD[uri] = annotations_dev(uri=uri, modality="speaker").get_timeline().support() class PyannoteFeatureExtractionError(Exception): pass class Precomputed(object): """Load precomputed features from HDF5 file Parameters ----------
# enrolment consists in summing all relevant embeddings def speaker_spotting_enrol(current_enrolment): enrol_with = current_enrolment['enrol_with'] embeddings = precomputed(current_enrolment) return np.sum(embeddings.crop(enrol_with), axis=0, keepdims=True) models = {} for current_enrolment in protocol.development_enrolment(): model_id = current_enrolment.pop('model_id') models[model_id] = speaker_spotting_enrol(current_enrolment) from pyannote.parser import MDTMParser cluster_mdtm = '/people/yin/projects/online_clustering/spotting/EURECOM-online-diarization-pyannote-VAD.dev.WithOffset.mdtm' parser_dev = MDTMParser() annotations_dev = parser_dev.read(cluster_mdtm) REFERENCE = {} for uri_part in annotations_dev.uris: uri = uri_part.split('_')[0] + '.Mix-Headset' if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(annotations_dev(uri=uri_part, modality="speaker")) # Trials from pyannote.core import SlidingWindow, SlidingWindowFeature from pyannote.audio.embedding.utils import cdist # trial consists in comparing each embedding to the target embedding
from pyannote.database import get_protocol, FileFinder protocol = get_protocol('AMI.SpeakerSpotting.MixHeadset', progress=True) from pyannote.core import Annotation,Segment, Timeline REFERENCE = {} for current_file in protocol.test(): uri = current_file['uri'] if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(current_file['annotation']) from pyannote.parser import MDTMParser sad_tst = '/people/yin/projects/online_clustering/spotting_test/AMI.SpeakerSpotting.MixHeadset.test.mdtm' parser_tst = MDTMParser() annotations_tst = parser_tst.read(sad_tst) SAD = {} for item in protocol.test(): uri = item['uri'] SAD[uri] = annotations_tst(uri=uri, modality="speaker").get_timeline().support() class PyannoteFeatureExtractionError(Exception): pass class Precomputed(object): """Load precomputed features from HDF5 file Parameters ---------- features_h5 : str Path to HDF5 file generated by script 'feature_extraction.py'. """
def speaker_spotting_enrol(current_enrolment): enrol_with = current_enrolment['enrol_with'] embeddings = precomputed(current_enrolment) return np.sum(embeddings.crop(enrol_with), axis=0, keepdims=True) models = {} for current_enrolment in protocol.test_enrolment(): model_id = current_enrolment.pop('model_id') models[model_id] = speaker_spotting_enrol(current_enrolment) from pyannote.parser import MDTMParser cluster_mdtm = 'OD_AVAD_tst.mdtm' parser_tst = MDTMParser() annotations_tst = parser_tst.read(cluster_mdtm) REFERENCE = {} for uri_part in annotations_tst.uris: uri = uri_part.split('_')[0] + '.Mix-Headset' if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(annotations_tst(uri=uri_part, modality="speaker")) # Trials from pyannote.core import SlidingWindow, SlidingWindowFeature from pyannote.audio.embedding.utils import cdist # trial consists in comparing each embedding to the target embedding
llss = [] trials = getattr(protocol, '{subset}_trial'.format(subset=subset))() for current_trial in trials: reference = current_trial.pop('reference') hypothesis = speaker_spotting_try_diarization(current_trial) llss.append(process_trial(current_trial, hypothesis)) import simplejson as json with open(output_file, 'w') as outfile: json.dump(llss, outfile) if arguments['automatic']: from pyannote.parser import MDTMParser diarization_mdtm = arguments['<diarization.mdtm>'] parser = MDTMParser() annotations = parser.read(diarization_mdtm) REFERENCE = {} for uri_part in annotations.uris: uri = uri_part.split('_')[0] + '.Mix-Headset' if uri not in REFERENCE: REFERENCE[uri] = Annotation(uri=uri) REFERENCE[uri].update(annotations(uri=uri_part, modality="speaker")) llss = [] trials = getattr(protocol, '{subset}_trial'.format(subset=subset))() for current_trial in trials: reference = current_trial.pop('reference') hypothesis = speaker_spotting_try_diarization(current_trial) llss.append(process_trial(current_trial, hypothesis))