示例#1
0
def get_stat_server(ubm, idmap, feature_server, stat_path):

    if os.path.exists(stat_path):
        print("stat server exits")
        stats = sidekit.StatServer(stat_path, distrib_nb=n_components, feature_size=n_feats)
    else:
        stats = sidekit.StatServer(idmap, distrib_nb=n_components, feature_size=n_feats)
        stats.accumulate_stat(ubm=ubm, feature_server=feature_server, seg_indices=range(stats.segset.shape[0]),num_thread=nj)
        stats.write(stat_path)
    return stats
示例#2
0
def adaptation(args):
    if args.feat_type == 'mfcc':
        datasetlist = ["energy", "cep", "vad"]
        mask = "[0-12]"
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'
    if args.feat_type == 'fb':
        datasetlist = ["fb", "vad"]
        mask = None
        features_folder = '/home/zeng/zeng/aishell/af2019-sr-devset-20190312/feature'

    # create feature server for loading feature from disk
    feature_server = sidekit.FeaturesServer(
        features_extractor=None,
        feature_filename_structure=features_folder + "/{}.h5",
        sources=None,
        dataset_list=datasetlist,
        mask=mask,
        feat_norm="cmvn",
        global_cmvn=None,
        dct_pca=False,
        dct_pca_config=None,
        sdc=False,
        sdc_config=None,
        delta=True if args.delta else False,
        double_delta=True if args.delta else False,
        delta_filter=None,
        context=None,
        traps_dct_nb=None,
        rasta=True,
        keep_all_features=False)

    enroll_idmap = sidekit.IdMap(os.getcwd() + '/task/idmap.h5')
    ndx = sidekit.Ndx(os.getcwd() + '/task/dev_ndx.h5')

    ubm = sidekit.Mixture()
    ubm.read(os.getcwd() + '/model/ubm.h5')
    enroll_stat = sidekit.StatServer(enroll_idmap,
                                     distrib_nb=ubm.distrib_nb(),
                                     feature_size=ubm.dim())
    enroll_stat.accumulate_stat(ubm=ubm,
                                feature_server=feature_server,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                num_thread=args.num_thread)
    enroll_stat.write(os.getcwd() + '/task/enroll_stat.h5')

    print('MAP adaptation', end='')
    regulation_factor = 16
    enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
    enroll_sv.write(os.getcwd() + '/task/enroll_sv.h5')
    print('\rMAP adaptation done')

    print('Compute scores', end='')
    score = sidekit.gmm_scoring(ubm,
                                enroll_sv,
                                ndx,
                                feature_server,
                                num_thread=args.num_thread)
    score.write(os.getcwd() + '/task/dev_score.h5')
    print('\rCompute scores done')
示例#3
0
    def train(self, SAVE_FLAG=True):
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server,  #sidekit.FeaturesServer used to load data
            feature_list=train_list,  # list of feature files to train the model
            distrib_nb=self.
            NUM_GUASSIANS,  # final number of Gaussian distributions
            iterations=(
                1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8
            ),  # list of iteration number for each step of the learning process
            num_thread=self.
            NUM_THREADS,  # number of thread to launch for parallel computing
            save_partial=False  # if False, it only saves the last model
        )
        # -> 2 iterations of EM with 2   distributions
        # -> 2 iterations of EM with 4   distributions
        # -> 4 iterations of EM with 8   distributions
        # -> 4 iterations of EM with 16  distributions
        # -> 4 iterations of EM with 32  distributions
        # -> 4 iterations of EM with 64  distributions
        # -> 8 iterations of EM with 128 distributions
        # -> 8 iterations of EM with 256 distributions
        # -> 8 iterations of EM with 512 distributions
        # -> 8 iterations of EM with 1024 distributions
        model_dir = os.path.join(self.BASE_DIR, "ubm")
        modelname = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        logging.info("Saving the model {} at {}".format(modelname, model_dir))
        ubm.write(os.path.join(model_dir, modelname))

        # Read idmap for the enrolling data
        enroll_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5"))
        # Create Statistic Server to store/process the enrollment data
        enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                         ubm=ubm)
        logging.debug(enroll_stat)

        # Compute the sufficient statistics for a list of sessions whose indices are segIndices.
        server.feature_filename_structure = os.path.join(
            self.BASE_DIR, "feat", "{}.h5")
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        enroll_stat.accumulate_stat(ubm=ubm,
                                    feature_server=server,
                                    seg_indices=range(
                                        enroll_stat.segset.shape[0]))
        if SAVE_FLAG:
            # Save the status of the enroll data
            filename = "enroll_stat_{}.h5".format(self.NUM_GUASSIANS)
            enroll_stat.write(os.path.join(self.BASE_DIR, "ubm", filename))
示例#4
0
    def data_init(self):
        # Read tv_idmap, and plda_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_tv.h5"))
        plda_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "idmap_plda.h5"))
        # Load UBM
        ubm = sidekit.Mixture()
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        # Create Feature Server
        fs = self.__createFeatureServer()

        # Create a joint StatServer for TV and PLDA training data
        back_idmap = plda_idmap.merge(tv_idmap)
        if not back_idmap.validate():
            logging.warning("Error merging tv_idmap & plda_idmap")
            return
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Jointly compute the sufficient statistics of TV and PLDA data
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        back_stat.accumulate_stat(ubm=ubm,
                                  feature_server=fs,
                                  seg_indices=range(back_stat.segset.shape[0]))
        back_stat.write(os.path.join(self.BASE_DIR, "task", 'stat_back.h5'))
        # Load the sufficient statistics from TV training data
        tv_stat = sidekit.StatServer.read_subset(
            os.path.join(self.BASE_DIR, "task", 'stat_back.h5'), tv_idmap)
        tv_stat.write(os.path.join(self.BASE_DIR, "task", 'tv_stat.h5'))
        # Train TV matrix using FactorAnalyser
        filename = "tv_matrix_{}".format(self.NUM_GUASSIANS)
        outputPath = os.path.join(self.BASE_DIR, "ivector", filename)
        fa = sidekit.FactorAnalyser()
        fa.total_variability_single(os.path.join(self.BASE_DIR, "task",
                                                 'tv_stat.h5'),
                                    ubm,
                                    tv_rank=self.RANK_TV,
                                    nb_iter=self.TV_ITERATIONS,
                                    min_div=True,
                                    tv_init=None,
                                    batch_size=self.BATCH_SIZE,
                                    save_init=False,
                                    output_file_name=outputPath)
示例#5
0
    def train_total_variability(self,
                                ubm,
                                fs,
                                distrib_nb,
                                rank_TV,
                                tv_iteration,
                                train_idmap,
                                num_threads=10):

        self.logger.info('train total variability ')

        train_stat = sidekit.StatServer(train_idmap,
                                        ubm=ubm,
                                        distrib_nb=distrib_nb,
                                        feature_size=0,
                                        index=None)
        train_stat.accumulate_stat(ubm=ubm,
                                   feature_server=fs,
                                   seg_indices=range(
                                       train_stat.segset.shape[0]),
                                   num_thread=num_threads)

        tv_mean, tv, _, __, tv_sigma = train_stat.factor_analysis(
            rank_f=rank_TV,
            rank_g=0,
            rank_h=None,
            re_estimate_residual=False,
            it_nb=(tv_iteration, 0, 0),
            min_div=True,
            ubm=ubm,
            batch_size=100,
            num_thread=num_threads)

        sidekit.sidekit_io.write_tv_hdf5(
            (tv, tv_mean, tv_sigma),
            get_experiment_nets() + "/TV_{}".format(self.network_file))
示例#6
0
    def get_embeddings(self):
        '''
        finally, testing:
        '''
        speaker_list = self.get_validation_data_name()
        distrib_nb = self.config.getint('i_vector', 'distrib_nb')
        nbThread = self.config.getint('i_vector', 'nbThread')
        vector_size = self.config.getint('i_vector', 'vector_size')
        feature_extension = 'h5'

        set_of_embeddings = []
        set_of_speakers = []
        set_of_num_embeddings = []
        set_of_times = []
        checkpoints = ["/TV_{}".format(self.network_file)]

        #load data:
        ubm = sidekit.Mixture()
        ubm.read(get_experiment_nets() +
                 '/ubm_{}.h5'.format(self.network_file))
        ubm_list, test_list_long = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_train_data())[1])[0])
        ubm_list, test_list_short = self.load_data(
            speaker_list,
            os.path.splitext(
                os.path.split(self.get_validation_test_data())[1])[0])
        tv, tv_mean, tv_sigma = sidekit.sidekit_io.read_tv_hdf5(
            get_experiment_nets() + "/TV_{}".format(self.network_file))

        fs = sidekit.FeaturesServer(feature_filename_structure=(
            "{dir}/{speaker_list}/feat/{{}}.{ext}".format(
                dir=get_training('i_vector'),
                speaker_list=speaker_list,
                ext=feature_extension)),
                                    dataset_list=["energy", "cep", "vad"],
                                    mask="[0-12]",
                                    feat_norm="cmvn",
                                    keep_all_features=True,
                                    delta=True,
                                    double_delta=True,
                                    rasta=True,
                                    context=None)

        #exract ivectors
        test_stat_long = sidekit.StatServer(test_list_long,
                                            ubm=ubm,
                                            distrib_nb=distrib_nb,
                                            feature_size=0,
                                            index=None)
        test_stat_long.accumulate_stat(ubm=ubm,
                                       feature_server=fs,
                                       seg_indices=range(
                                           test_stat_long.segset.shape[0]),
                                       num_thread=nbThread)

        test_stat_short = sidekit.StatServer(test_list_short,
                                             ubm=ubm,
                                             distrib_nb=distrib_nb,
                                             feature_size=0,
                                             index=None)
        test_stat_short.accumulate_stat(ubm=ubm,
                                        feature_server=fs,
                                        seg_indices=range(
                                            test_stat_short.segset.shape[0]),
                                        num_thread=nbThread)

        test_iv_long = test_stat_long.estimate_hidden(tv_mean,
                                                      tv_sigma,
                                                      V=tv,
                                                      batch_size=100,
                                                      num_thread=nbThread)[0]
        test_iv_short = test_stat_short.estimate_hidden(tv_mean,
                                                        tv_sigma,
                                                        V=tv,
                                                        batch_size=100,
                                                        num_thread=nbThread)[0]

        iv_lis, y_list, s_list = create_data_lists(
            False, test_iv_long.stat1, test_iv_short.stat1,
            test_list_long.leftids.astype(int),
            test_list_short.leftids.astype(int))

        #generate embeddings
        embeddings, speakers, num_embeddings = generate_embeddings(
            iv_lis, y_list, vector_size)

        set_of_embeddings.append(embeddings)
        set_of_speakers.append(speakers)
        set_of_num_embeddings.append(num_embeddings)
        set_of_times = [
            np.zeros(
                (len(test_list_long.leftids) + len(test_list_short.leftids), ),
                dtype=int)
        ]

        return checkpoints, set_of_embeddings, set_of_speakers, set_of_num_embeddings, set_of_times
示例#7
0
for line in eval_lines:
    splits = line.strip().split(' ')    
    uttId = splits[0]    
    spkId = uttId.split('_')[0]
    models.append(spkId)
    segments.append(uttId)

enroll_idmap.leftids = numpy.asarray(models)
enroll_idmap.rightids = numpy.asarray(segments)
enroll_idmap.start = numpy.empty(enroll_idmap.rightids.shape, '|O')
enroll_idmap.stop = numpy.empty(enroll_idmap.rightids.shape, '|O')
enroll_idmap.validate()

print('Compute the sufficient statistics')
 # Create a StatServer for the enrollment data and compute the statistics
enroll_stat = sidekit.StatServer(enroll_idmap, components_num, n_feats)
enroll_stat.accumulate_stat(ubm=ubm, feature_server=server_enroll, seg_indices=range(enroll_stat.segset.shape[0]), num_thread=nj)

print('MAP adaptation of the speaker models')
regulation_factor = 16  # MAP regulation factor default=16
enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
enroll_sv.write('/home/wcq/bird/task/enroll_map_models.h5')

enroll_sv = sidekit.StatServer('/home/wcq/bird/task/enroll_map_models.h5', components_num, n_feats)

print('get test feats')
testList, test_input_file_list, test_output_feats_list = basic_ops.get_info4mfcc(test_wavscp_path, project_dir, 'test')
basic_ops.make_mfcc_feats(testList, test_input_file_list, test_output_feats_list, nj)
server_test = basic_ops.get_feature_server(test_feature_filename_structure)

ubm_w = ubm.w
    def __create_stats(self):
        """
        This private method is used to create Statistic Servers.
        TODO: post some more info
        """
        # Read tv_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "tv_idmap.h5"))
        back_idmap = tv_idmap
        # If PLDA is enabled
        if self.ENABLE_PLDA:
            # Read plda_idmap
            plda_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "plda_idmap.h5"))
            # Create a joint StatServer for TV and PLDA training data
            back_idmap = plda_idmap.merge(tv_idmap)
            if not back_idmap.validate():
                raise RuntimeError("Error merging tv_idmap & plda_idmap")

        # Check UBM model
        ubm_name = "ubm_{}.h5".format(self.NUM_GAUSSIANS)
        ubm_path = os.path.join(self.BASE_DIR, "ubm", ubm_name)
        if not os.path.exists(ubm_path):
            #if UBM model does not exist, train one
            logging.info("Training UBM-{} model".format(self.NUM_GAUSSIANS))
            ubm = UBM(self.conf_path)
            ubm.train()
        #load trained UBM model
        logging.info("Loading trained UBM-{} model".format(self.NUM_GAUSSIANS))
        ubm = sidekit.Mixture()
        ubm.read(ubm_path)
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Create Feature Server
        fs = self.createFeatureServer()

        # Jointly compute the sufficient statistics of TV and (if enabled) PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread
            # as it's prune to race-conditioning
            back_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          back_stat.segset.shape[0]))
            back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))

        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat",
                                           tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap)
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))

        # Load sufficient statistics and extract i-vectors from PLDA training data
        if self.ENABLE_PLDA:
            plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GAUSSIANS)
            if not os.path.isfile(
                    os.path.join(self.BASE_DIR, "stat", plda_filename)):
                plda_stat = sidekit.StatServer.read_subset(
                    os.path.join(self.BASE_DIR, "stat", back_filename),
                    plda_idmap)
                plda_stat.write(
                    os.path.join(self.BASE_DIR, "stat", plda_filename))

        # Load sufficient statistics from test data
        filename = 'test_stat_{}.h5'.format(self.NUM_GAUSSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
            test_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
            test_stat = sidekit.StatServer(statserver_file_name=test_idmap,
                                           ubm=ubm)
            # Create Feature Server
            fs = self.createFeatureServer()
            # Jointly compute the sufficient statistics of TV and PLDA data
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            test_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          test_stat.segset.shape[0]))
            test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
示例#9
0
print('Train the UBM by EM')
# Extract all features and train a GMM without writing to disk
ubm = sidekit.Mixture()
llk = ubm.EM_split(features_server,
                   ubmList,
                   distribNb,
                   num_thread=nbThread,
                   save_partial=True)
ubm.write('gmm/ubm.h5')

# Compute the sufficient statistics on the UBM

print('Compute the sufficient statistics')
# Create a StatServer for the enrollment data and compute the statistics
enroll_stat = sidekit.StatServer(enroll_idmap, distrib_nb=4, feature_size=50)
enroll_stat.accumulate_stat(ubm=ubm,
                            feature_server=features_server,
                            seg_indices=range(enroll_stat.segset.shape[0]),
                            num_thread=nbThread)
enroll_stat.write('data/stat_rsr2015_male_enroll.h5')

# Adapt the GMM speaker models from the UBM via a MAP adaptation

print('MAP adaptation of the speaker models')
regulation_factor = 3  # MAP regulation factor
enroll_sv = enroll_stat.adapt_mean_map_multisession(ubm, regulation_factor)
enroll_sv.write('data/sv_rsr2015_male_enroll.h5')

# Compute all trials and save scores in HDF5 format
示例#10
0
def train_net(net, train_dataloader, test_dataloader):
    if not os.path.exists(ConfigNetwork.modelname):
        last_model_loaded = False  # True # False
        # inits
        iteration_number = 0
        for epoch in range(0, ConfigNetwork.train_number_epochs):
            """
            if ConfigNetwork.learning_rate_scheduler:
                optimizer = optim.Adam(net.parameters(),
                                       lr = ConfigNetwork.learning_rate)
                scheduler = ReduceLROnPlateau(optimizer, 'min')
            else:
            """
            epoch_learning_rate_exponent = max(
                0,
                epoch - (ConfigNetwork.learning_rate_defactor_after_epoch - 1))
            lr = ConfigNetwork.learning_rate * ConfigNetwork.learning_rate_defactor**epoch_learning_rate_exponent
            parameters = filter(lambda p: p.requires_grad, net.parameters())
            if ConfigNetwork.train_vae:
                optimizer = pyro_optim.Adam({'lr': lr})
            else:
                optimizer = optim.Adam(parameters, lr=lr)
            base_file_pattern = os.path.join(
                ConfigNetwork.storage_dir,
                '{}_epoch_{}'.format(ConfigNetwork.modelname, epoch))
            epoch_net_file = '{}_model'.format(base_file_pattern)
            if epoch < ConfigNetwork.freeze_ResNet_epochs:
                net.set_ResNet_requires_grad(requires_grad=False)
            else:
                net.set_ResNet_requires_grad(requires_grad=True)
            if not os.path.exists(epoch_net_file) and epoch == 0:
                # init meta embeddings network
                if ConfigNetwork.train_with_meta_embeddings:
                    logging.debug('init B with plda expectation')
                    if not os.path.exists(ConfigNetwork.embeddings_file):
                        dataset = SoftMaxDatabase(
                            imageFolderDataset=ConfigFaceDatasets.
                            dataset_class(
                                root=ConfigFaceDatasets.training_dir),
                            transform=train_dataloader.dataset.transform,
                            should_invert=False)
                        embeddings_loader = DataLoader(
                            dataset,
                            shuffle=False,
                            num_workers=ConfigNetwork.num_workers,
                            batch_size=ConfigNetwork.batch_size_train)
                        softmax_net = net.to_softmaxNetwork()
                        softmax_net.normalize = False
                        with h5py.File(ConfigNetwork.embeddings_file,
                                       "a") as embd_file:
                            for i, data in enumerate(embeddings_loader, 0):
                                img0, label = data
                                img0, label = Variable(img0).cuda(), Variable(
                                    label).cuda()
                                output0 = super(SoftMaxNetwork,
                                                softmax_net).forward_once(img0)
                                embd_file.create_dataset(
                                    "{}".format(i),
                                    data=numpy.column_stack(
                                        (output0.data.cpu().numpy(),
                                         label.data.cpu().numpy())),
                                    compression="gzip",
                                    fletcher32=True)
                            logging.critical('extracted embeddings')
                    if not os.path.exists(
                            ConfigNetwork.embeddings_file_plda
                    ) or not os.path.exists(
                            ConfigNetwork.embeddings_mean_file):
                        data = []
                        with h5py.File(ConfigNetwork.embeddings_file,
                                       "r") as h5f:
                            for key, value in h5f.items():
                                data.append(value.value)
                        data = numpy.concatenate(data)
                        embeddings = data[:, :ConfigNetwork.embedding_size]
                        embeddings_mean = embeddings.mean(0)
                        numpy.save(ConfigNetwork.embeddings_mean_file,
                                   embeddings_mean)
                        logging.debug('embeddings mean: {}'.format(
                            embeddings.mean(0)))
                        embeddings -= embeddings.mean(0)
                        embeddings = (embeddings.T / numpy.linalg.norm(
                            embeddings, axis=1)).T  # prepare cosine distance
                        embedding_labels = data[:, ConfigNetwork.
                                                embedding_size:].squeeze()

                        s = sidekit.StatServer()
                        s.modelset = embedding_labels
                        s.segset = numpy.arange(
                            embedding_labels.shape[0]).astype(str)
                        s.stat0 = numpy.ones((embedding_labels.shape[0], 1))
                        s.stat1 = copy.deepcopy(embeddings)
                        s.start = numpy.empty(embedding_labels.shape[0],
                                              dtype='|O')
                        s.stop = numpy.empty(embedding_labels.shape[0],
                                             dtype='|O')
                        s.validate()
                        ids = numpy.unique(s.modelset)
                        class_nb = ids.shape[0]

                        f = sidekit.FactorAnalyser()
                        rank_f = ConfigNetwork.embedding_size
                        f.plda(s, rank_f=rank_f)
                        f.write(ConfigNetwork.embeddings_file_plda)
                    else:
                        f = sidekit.FactorAnalyser(
                            ConfigNetwork.embeddings_file_plda)

                    e_mu = torch.from_numpy(f.mean).type(torch.FloatTensor)
                    e_B = torch.from_numpy(
                        numpy.linalg.inv(f.Sigma).diagonal()).type(
                            torch.FloatTensor)
                    # e_B = torch.from_numpy(numpy.linalg.inv(f.Sigma)).type(torch.FloatTensor)
                    assert (isinstance(net, GME_SoftmaxNetwork))
                    net = GME_SoftmaxNetwork(
                        num_train_classes=net.num_train_classes,
                        pretrained_siamese_net=net.pretrained_net,
                        expected_mu=e_mu,
                        expected_B=e_B).cuda()
                    logging.debug('init B with plda done')

            if not os.path.exists(epoch_net_file):
                if last_model_loaded:
                    logging.critical(
                        'run validation on epoch {}'.format(epoch - 1))
                    test_model(database_dir=test_dataloader,
                               net=net,
                               net_distance=net_distance,
                               epoch=None)
                    last_model_loaded = False

                if ConfigNetwork.select_difficult_pairs_epoch is not None:
                    if epoch == ConfigNetwork.select_difficult_pairs_epoch:
                        train_dataloader = select_difficult_pairs(
                            net, train_dataloader)

                # train an epoch
                net.train()
                train_epoch(train_dataloader=train_dataloader,
                            net=net,
                            optimizer=optimizer,
                            epoch=epoch,
                            iteration_number=iteration_number)
                torch.save(obj=net.state_dict(), f=epoch_net_file)
            else:
                net.load_state_dict(torch.load(epoch_net_file))
                logging.info('loaded model for epoch: {}'.format(epoch))
                last_model_loaded = True
                continue

            logging.critical('run validation on epoch {}'.format(epoch))
            test_model(database_dir=test_dataloader,
                       net=net,
                       net_distance=net_distance,
                       epoch=None)
        torch.save(obj=net.state_dict(),
                   f='{}'.format(ConfigNetwork.modelname))
        logging.info('training completed, model stored.')
示例#11
0
文件: 4.py 项目: khac/DCASE-Mandi
import os
os.environ['THEANO_FLAGS'] = 'device=cpu'
os.environ['SIDEKIT'] = 'libsvm=false,theano=false'
import sys
import sidekit
import h5py
import logging
import numpy as np

directory = os.fsencode(
    "/home/adit/Desktop/DCASE2017-baseline-system-master/Model_DCASE"
)  #"/home/adit/Desktop")
distribNb = 2048
ubm = sidekit.Mixture()
enroll_stat = sidekit.StatServer(distrib_nb=distribNb, feature_size=40)
regulation_factor = 3  # MAP regulation factor
enroll_sv = enroll_stat.adapt_mean_map(ubm, regulation_factor)
enroll_sv.write('gmm_adapted.h5')
示例#12
0
    def train(self, SAVE=True):
        """
        This method is used to train our UBM model by doing the following:
        - Create FeatureServe for the enroll features
        - create use EM algorithm to train our UBM over the enroll features
        - create StatServer to save trained parameters
        - if Save arugment is True (which is by default), then it saves that
          StatServer.
        Args:
            SAVE (boolean): if True, then it will save the StatServer. If False,
               then the StatServer will be discarded.
        """
        #SEE: https://projets-lium.univ-lemans.fr/sidekit/tutorial/ubmTraining.html
        train_list = os.listdir(os.path.join(self.BASE_DIR, "audio", "enroll"))
        for i in range(len(train_list)):
            train_list[i] = train_list[i].split(".h5")[0]
        server = self.createFeatureServer("enroll")
        logging.info("Training...")
        ubm = sidekit.Mixture()
        # Set the model name
        ubm.name = "ubm_{}.h5".format(self.NUM_GAUSSIANS) 
        # Expectation-Maximization estimation of the Mixture parameters.
        ubm.EM_split(
            features_server=server, #sidekit.FeaturesServer used to load data
            feature_list=train_list, #list of feature files to train the model
            distrib_nb=self.NUM_GAUSSIANS, #number of Gaussian distributions
            num_thread=self.NUM_THREADS, # number of parallel processes
            save_partial=False, # if False, it only saves the last model
            iterations=(1, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8)
            )
            # -> 2 iterations of EM with 2    distributions
            # -> 2 iterations of EM with 4    distributions
            # -> 4 iterations of EM with 8    distributions
            # -> 4 iterations of EM with 16   distributions
            # -> 4 iterations of EM with 32   distributions
            # -> 4 iterations of EM with 64   distributions
            # -> 8 iterations of EM with 128  distributions
            # -> 8 iterations of EM with 256  distributions
            # -> 8 iterations of EM with 512  distributions
            # -> 8 iterations of EM with 1024 distributions
        model_dir = os.path.join(self.BASE_DIR, "ubm")
        logging.info("Saving the model {} at {}".format(ubm.name, model_dir))
        ubm.write(os.path.join(model_dir, ubm.name))

        # Read idmap for the enrolling data
        enroll_idmap = sidekit.IdMap.read(os.path.join(self.BASE_DIR, "task", "enroll_idmap.h5"))
        # Create Statistic Server to store/process the enrollment data
        enroll_stat = sidekit.StatServer(statserver_file_name=enroll_idmap,
                                         ubm=ubm)
        logging.debug(enroll_stat)

        server.feature_filename_structure = os.path.join(self.BASE_DIR, "feat", "{}.h5")
        # Compute the sufficient statistics for a list of sessions whose indices are segIndices.
        #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
        enroll_stat.accumulate_stat(ubm=ubm,
                                    feature_server=server,
                                    seg_indices=range(enroll_stat.segset.shape[0])
                                   )
        if SAVE:
            # Save the status of the enroll data
            filename = "enroll_stat_{}.h5".format(self.NUM_GAUSSIANS)
            enroll_stat.write(os.path.join(self.BASE_DIR, "stat", filename))
示例#13
0
from collections import OrderedDict
from fuel.datasets import IndexableDataset
from fuel.schemes import (SequentialScheme, ShuffledScheme,
                          SequentialExampleScheme, ShuffledExampleScheme)
from fuel.schemes import ConstantScheme
from fuel.transformers import Mapping, Batch, Padding, Filter, Unpack
from fuel.streams import DataStream

import sympy
""" ------------------ Set a few parameters here --------------------"""
Test = False
""" ---------------------------------------------------------------- """

# LOAD STATSERVER OF I-VECTORS
ivss = sidekit.StatServer("iv_sre04050608_m_training_tandem.h5")
mu = ivss.get_mean_stat1()
std = ivss.get_total_covariance_stat1()
ivss.whiten_stat1(mu, std)
ivss.norm_stat1()

# GET LIST OF UNIQUE SPEAKERS WITH NUMBER OF SESSIONS FOR EACH
unique_spk = set(ivss.modelset.tolist())
speakers = []
sess = []
for spk in unique_spk:
    speakers.append((spk, (ivss.modelset == spk).sum()))
    sess.append((ivss.modelset == spk).sum())

# COMPUUTE THE NUMBER OF POSSIBLE UNIQUE TARGET TRIPLETS
positive_example = int(0)
示例#14
0
    # %%
    #################################################################
    # Train the Universal background Model (UBM)
    #################################################################
    print('Train the UBM by EM')
    ubm = sidekit.Mixture()
    llk = ubm.EM_split(fs, ubmList, distribNb, numThread=nbThread)
    ubm.save_pickle('gmm/ubm_bnf.p')

    # %%
    #################################################################
    # Compute the sufficient statistics on the UBM
    #################################################################
    print('Compute the sufficient statistics')
    # Create a StatServer for the enrollment data and compute the statistics
    enroll_stat = sidekit.StatServer(enroll_idmap, ubm)
    enroll_stat.accumulate_stat(ubm=ubm,
                                feature_server=fs,
                                seg_indices=range(enroll_stat.segset.shape[0]),
                                numThread=nbThread)
    enroll_stat.save('data/stat_sre10_coreX-coreX_m_enroll_bnf.h5')

    nap_stat = sidekit.StatServer(nap_idmap, ubm)
    nap_stat.accumulate_stat(ubm=ubm,
                             feature_server=fs,
                             seg_indices=range(nap_stat.segset.shape[0]),
                             numThread=nbThread)
    nap_stat.save('data/stat_sre04050608_m_training_bnf.h5')

    test_stat = sidekit.StatServer(test_idmap, ubm)
    test_stat.accumulate_stat(ubm=ubm,
示例#15
0
#!coding=utf-8
import sidekit
from utils import BasicUtils
basic_ops = BasicUtils()

enroll_ivecs_stat = sidekit.StatServer("./exp/enroll_ivecs_stat", distrib_nb=512, feature_size=63)
test_ivecs_stat = sidekit.StatServer("./exp/test_ivecs_stat", distrib_nb=512, feature_size=63)

sts_per_model = enroll_ivecs_stat.mean_stat_per_model()
spk_list = sts_per_model.modelset

mean_ivecs = sts_per_model.stat1
test_ivecs = test_ivecs_stat.stat1

print(spk_list.shape, mean_ivecs.shape, test_ivecs.shape)

test_utts = test_ivecs_stat.segset
print(test_utts.shape)

result_lines = []
for k in range(len(test_utts)):
    uttId = test_utts[k]
    uttId_ivec = test_ivecs[k]
    temp_scores = []
    for i in range(len(spk_list)):
        cos = basic_ops.compute_cosine(uttId_ivec, mean_ivecs[i])
        temp_scores.append(cos)
    max_score = max(temp_scores)
    max_score_index = temp_scores.index(max_score)
    result_spk = spk_list[max_score_index]
示例#16
0
    def __create_stats(self):
        # Read tv_idmap, and plda_idmap
        tv_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "tv_idmap.h5"))
        plda_idmap = sidekit.IdMap.read(
            os.path.join(self.BASE_DIR, "task", "plda_idmap.h5"))
        # Create a joint StatServer for TV and PLDA training data
        back_idmap = plda_idmap.merge(tv_idmap)
        if not back_idmap.validate():
            raise RuntimeError("Error merging tv_idmap & plda_idmap")

        # Load UBM
        model_name = "ubm_{}.h5".format(self.NUM_GUASSIANS)
        ubm = sidekit.Mixture()
        ubm.read(os.path.join(self.BASE_DIR, "ubm", model_name))
        back_stat = sidekit.StatServer(statserver_file_name=back_idmap,
                                       ubm=ubm)
        # Create Feature Server
        fs = self.createFeatureServer()

        # Jointly compute the sufficient statistics of TV and PLDA data
        back_filename = 'back_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", back_filename)):
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            back_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          back_stat.segset.shape[0]))
            back_stat.write(os.path.join(self.BASE_DIR, "stat", back_filename))

        # Load the sufficient statistics from TV training data
        tv_filename = 'tv_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat",
                                           tv_filename)):
            tv_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), tv_idmap)
            tv_stat.write(os.path.join(self.BASE_DIR, "stat", tv_filename))

        # Load sufficient statistics and extract i-vectors from PLDA training data
        plda_filename = 'plda_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(
                os.path.join(self.BASE_DIR, "stat", plda_filename)):
            plda_stat = sidekit.StatServer.read_subset(
                os.path.join(self.BASE_DIR, "stat", back_filename), plda_idmap)
            plda_stat.write(os.path.join(self.BASE_DIR, "stat", plda_filename))

        # Load sufficient statistics from test data
        filename = 'test_stat_{}.h5'.format(self.NUM_GUASSIANS)
        if not os.path.isfile(os.path.join(self.BASE_DIR, "stat", filename)):
            test_idmap = sidekit.IdMap.read(
                os.path.join(self.BASE_DIR, "task", "test_idmap.h5"))
            test_stat = sidekit.StatServer(statserver_file_name=test_idmap,
                                           ubm=ubm)
            # Create Feature Server
            fs = self.createFeatureServer()
            # Jointly compute the sufficient statistics of TV and PLDA data
            #BUG: don't use self.NUM_THREADS when assgining num_thread as it's prune to race-conditioning
            test_stat.accumulate_stat(ubm=ubm,
                                      feature_server=fs,
                                      seg_indices=range(
                                          test_stat.segset.shape[0]))
            test_stat.write(os.path.join(self.BASE_DIR, "stat", filename))