示例#1
0
def gmm_estep(algorithm, extractor, iteration, indices, force=False):
  """Performs a single E-step of the GMM training (parallel)."""
  if indices[0] >= indices[1]:
    return
  fs = FileSelector.instance()

  stats_file = fs.gmm_stats_file(iteration, indices[0], indices[1])
  new_machine_file = fs.gmm_intermediate_file(iteration + 1)

  if  utils.check_file(stats_file, force, 1000) or utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping GMM E-Step since the file '%s' or '%s' already exists", stats_file, new_machine_file)
  else:
    training_list = fs.training_list('extracted', 'train_projector')
    last_machine_file = fs.gmm_intermediate_file(iteration)
    gmm_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(last_machine_file))

    logger.info("UBM training: GMM E-Step from range(%d, %d)", *indices)

    # read data
    data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])])
    trainer = algorithm.ubm_trainer
    trainer.initialize(gmm_machine, None)

    # Calls the E-step and extracts the GMM statistics
    algorithm.ubm_trainer.e_step(gmm_machine, data)
    gmm_stats = algorithm.ubm_trainer.gmm_statistics

    # Saves the GMM statistics to the file
    bob.io.base.create_directories_safe(os.path.dirname(stats_file))
    gmm_stats.save(bob.io.base.HDF5File(stats_file, 'w'))
    logger.info("UBM training: Wrote GMM stats '%s'", stats_file)
示例#2
0
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
  """Initializes the K-Means training (non-parallel)."""
  fs = FileSelector.instance()

  output_file = fs.kmeans_intermediate_file(0)

  if utils.check_file(output_file, force, 1000):
    logger.info("UBM training: Skipping KMeans initialization since the file '%s' already exists", output_file)
  else:
    # read data
    logger.info("UBM training: initializing kmeans")
    training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)

    # read the features
    reader = functools.partial(read_feature, extractor)
    data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)

    # Perform KMeans initialization
    kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
    # Creates the KMeansTrainer and call the initialization procedure
    # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. 
    algorithm.rng = bob.core.random.mt19937(algorithm.init_seed)
    
    algorithm.kmeans_trainer.initialize(kmeans_machine, data, algorithm.rng)
    bob.io.base.create_directories_safe(os.path.dirname(output_file))
    kmeans_machine.save(bob.io.base.HDF5File(output_file, 'w'))
    logger.info("UBM training: saved initial KMeans machine to '%s'", output_file)
示例#3
0
def lda_project(algorithm, indices, force=False, allow_missing_files=False):
    """Performs IVector projection"""
    fs = FileSelector.instance()
    algorithm.load_lda(fs.lda_file)

    whitened_files = fs.training_list('whitened', 'train_projector')
    lda_projected_files = fs.training_list('lda_projected', 'train_projector')

    logger.info(
        "IVector training: LDA projection range (%d, %d) from '%s' to '%s'",
        indices[0], indices[1], fs.directories['whitened'],
        fs.directories['lda_projected'])
    # extract the features
    for i in range(indices[0], indices[1]):
        ivector_file = whitened_files[i]
        lda_projected_file = lda_projected_files[i]
        if not utils.check_file(lda_projected_file, force):
            if len(
                    utils.filter_missing_files(
                        [ivector_file],
                        split_by_client=False,
                        allow_missing_files=allow_missing_files)) > 0:
                # load feature
                ivector = algorithm.read_feature(ivector_file)
                # project feature
                lda_projected = algorithm.project_lda(ivector)
                # write it
                bob.io.base.create_directories_safe(
                    os.path.dirname(lda_projected_file))
                bob.bio.base.save(lda_projected, lda_projected_file)
示例#4
0
def train_isv(algorithm, force=False, allow_missing_files=False):
    """Finally, the UBM is used to train the ISV projector/enroller."""
    fs = FileSelector.instance()

    if utils.check_file(fs.projector_file, force, 800):
        logger.info(
            "ISV training: Skipping ISV training since '%s' already exists",
            fs.projector_file)
    else:
        # read UBM into the ISV class
        algorithm.load_ubm(fs.ubm_file)

        # read training data
        training_list = fs.training_list('projected_gmm',
                                         'train_projector',
                                         arrange_by_client=True)
        training_list = utils.filter_missing_files(
            training_list,
            split_by_client=True,
            allow_missing_files=allow_missing_files)
        train_gmm_stats = [[
            algorithm.read_gmm_stats(filename) for filename in client_files
        ] for client_files in training_list]

        # perform ISV training
        logger.info("ISV training: training ISV with %d clients",
                    len(train_gmm_stats))
        algorithm.train_isv(train_gmm_stats)
        # save result
        bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
        algorithm.save_projector(fs.projector_file)
示例#5
0
def gmm_initialize(algorithm, extractor, limit_data = None, force = False, allow_missing_files = False):
  """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
  This might require a lot of memory."""
  fs = FileSelector.instance()

  output_file = fs.gmm_intermediate_file(0)

  if utils.check_file(output_file, force, 800):
    logger.info("UBM Training: Skipping GMM initialization since '%s' already exists", output_file)
  else:
    logger.info("UBM Training: Initializing GMM")

    training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)

    # read the features
    reader = functools.partial(read_feature, extractor)
    data = utils.vstack_features(reader, training_list, allow_missing_files=allow_missing_files)

    # get means and variances of kmeans result
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
    [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

    # Create initial GMM Machine
    gmm_machine = bob.learn.em.GMMMachine(algorithm.gaussians, data.shape[1])

    # Initializes the GMM
    gmm_machine.means = kmeans_machine.means
    gmm_machine.variances = variances
    gmm_machine.weights = weights
    gmm_machine.set_variance_thresholds(algorithm.variance_threshold)

    # write gmm machine to file
    bob.io.base.create_directories_safe(os.path.dirname(output_file))
    gmm_machine.save(bob.io.base.HDF5File(output_file, 'w'))
    logger.info("UBM Training: Wrote GMM file '%s'", output_file)
示例#6
0
def wccn_project(algorithm, indices, force=False):
  """Performs IVector projection"""
  fs = FileSelector.instance()
  algorithm.load_wccn(fs.wccn_file)
  if algorithm.use_lda:
    input_label = 'lda_projected'
  else:
    input_label = 'whitened'

  input_files = fs.training_list(input_label, 'train_projector')
  wccn_projected_files = fs.training_list('wccn_projected', 'train_projector')

  logger.info("IVector training: WCCN projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories[input_label], fs.directories['wccn_projected'])
  # extract the features
  for i in range(indices[0], indices[1]):
    ivector_file = input_files[i]
    wccn_projected_file = wccn_projected_files[i]
    if not utils.check_file(wccn_projected_file, force):
      # load feature
      ivector = algorithm.read_feature(ivector_file)
      # project feature
      wccn_projected = algorithm.project_wccn(ivector)
      # write it
      bob.io.base.create_directories_safe(os.path.dirname(wccn_projected_file))
      bob.bio.base.save(wccn_projected, wccn_projected_file)
示例#7
0
def kmeans_initialize(algorithm, extractor, limit_data=None, force=False):
    """Initializes the K-Means training (non-parallel)."""
    fs = FileSelector.instance()

    output_file = fs.kmeans_intermediate_file(0)

    if utils.check_file(output_file, force, 1000):
        logger.info(
            "UBM training: Skipping KMeans initialization since the file '%s' already exists",
            output_file)
    else:
        # read data
        logger.info("UBM training: initializing kmeans")
        training_list = utils.selected_elements(
            fs.training_list('extracted', 'train_projector'), limit_data)
        data = numpy.vstack([
            read_feature(extractor, feature_file)
            for feature_file in training_list
        ])

        # Perform KMeans initialization
        kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians,
                                                    data.shape[1])
        # Creates the KMeansTrainer and call the initialization procedure
        algorithm.kmeans_trainer.initialize(kmeans_machine, data,
                                            algorithm.rng)
        bob.io.base.create_directories_safe(os.path.dirname(output_file))
        kmeans_machine.save(bob.io.base.HDF5File(output_file, 'w'))
        logger.info("UBM training: saved initial KMeans machine to '%s'",
                    output_file)
示例#8
0
def train_plda(algorithm, force=False, allow_missing_files=False):
    """Train the feature projector with the extracted features of the world group."""
    fs = FileSelector.instance()
    if utils.check_file(fs.plda_file, force, 1000):
        logger.info("- PLDA projector '%s' already exists.", fs.plda_file)
    else:
        if algorithm.use_wccn:
            input_label = 'wccn_projected'
        elif algorithm.use_lda:
            input_label = 'lda_projected'
        else:
            input_label = 'whitened'
        train_files = fs.training_list(input_label,
                                       'train_projector',
                                       arrange_by_client=True)
        train_files = utils.filter_missing_files(
            train_files,
            split_by_client=True,
            allow_missing_files=allow_missing_files)
        train_features = [[
            bob.bio.base.load(filename) for filename in client_files
        ] for client_files in train_files]

        # perform training
        algorithm.train_plda(train_features)
        bob.io.base.create_directories_safe(os.path.dirname(fs.plda_file))
        bob.bio.base.save(algorithm.plda_base, fs.plda_file)
示例#9
0
def gmm_initialize(algorithm, extractor, limit_data = None, force = False):
  """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
  This might require a lot of memory."""
  fs = FileSelector.instance()

  output_file = fs.gmm_intermediate_file(0)

  if utils.check_file(output_file, force, 800):
    logger.info("UBM Training: Skipping GMM initialization since '%s' already exists", output_file)
  else:
    logger.info("UBM Training: Initializing GMM")

    # read features
    training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
    data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list])

    # get means and variances of kmeans result
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(fs.kmeans_file))
    [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

    # Create initial GMM Machine
    gmm_machine = bob.learn.em.GMMMachine(algorithm.gaussians, data.shape[1])

    # Initializes the GMM
    gmm_machine.means = kmeans_machine.means
    gmm_machine.variances = variances
    gmm_machine.weights = weights
    gmm_machine.set_variance_thresholds(algorithm.variance_threshold)

    # write gmm machine to file
    bob.io.base.create_directories_safe(os.path.dirname(output_file))
    gmm_machine.save(bob.io.base.HDF5File(output_file, 'w'))
    logger.info("UBM Training: Wrote GMM file '%s'", output_file)
示例#10
0
def gmm_project(algorithm, extractor, indices, force=False):
    """Performs GMM projection"""
    fs = FileSelector.instance()

    algorithm.load_ubm(fs.ubm_file)

    feature_files = fs.training_list('extracted', 'train_projector')
    projected_files = fs.training_list('projected_gmm', 'train_projector')

    logger.info(
        "ISV training: Project features range (%d, %d) from '%s' to '%s'",
        indices[0], indices[1], fs.directories['extracted'],
        fs.directories['projected_gmm'])

    # extract the features
    for i in range(indices[0], indices[1]):
        feature_file = feature_files[i]
        projected_file = projected_files[i]

        if not utils.check_file(projected_file, force):
            # load feature
            feature = read_feature(extractor, feature_file)
            # project feature
            projected = algorithm.project_ubm(feature)
            # write it
            bob.io.base.create_directories_safe(
                os.path.dirname(projected_file))
            bob.bio.base.save(projected, projected_file)
示例#11
0
def ivector_estep(algorithm,
                  iteration,
                  indices,
                  force=False,
                  allow_missing_files=False):
    """Performs a single E-step of the IVector algorithm (parallel)"""
    fs = FileSelector.instance()
    stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1])

    if utils.check_file(stats_file, force, 1000):
        logger.info(
            "IVector training: Skipping IVector E-Step since the file '%s' already exists",
            stats_file)
    else:
        logger.info("IVector training: E-Step from range(%d, %d)", *indices)

        # Temporary machine used for initialization
        algorithm.load_ubm(fs.ubm_file)

        # get the IVectorTrainer and call the initialization procedure
        trainer = algorithm.ivector_trainer

        # Load machine
        if iteration:
            # load last TV file
            tv = bob.learn.em.IVectorMachine(
                bob.io.base.HDF5File(fs.ivector_intermediate_file(iteration)))
            tv.ubm = algorithm.ubm
        else:
            # create new TV machine
            tv = bob.learn.em.IVectorMachine(algorithm.ubm,
                                             algorithm.subspace_dimension_of_t,
                                             algorithm.variance_threshold)
            trainer.initialize(tv)

        # Load data
        training_list = fs.training_list('projected_gmm', 'train_projector')
        training_list = [
            training_list[i] for i in range(indices[0], indices[1])
        ]
        training_list = utils.filter_missing_files(
            training_list,
            split_by_client=False,
            allow_missing_files=allow_missing_files)
        data = [algorithm.read_gmm_stats(f) for f in training_list]

        # Perform the E-step
        trainer.e_step(tv, data)

        # write results to file
        bob.io.base.create_directories_safe(os.path.dirname(stats_file))
        hdf5 = bob.io.base.HDF5File(stats_file, 'w')
        hdf5.set('acc_nij_wij2', trainer.acc_nij_wij2)
        hdf5.set('acc_fnormij_wij', trainer.acc_fnormij_wij)
        hdf5.set('acc_nij', trainer.acc_nij)
        hdf5.set('acc_snormij', trainer.acc_snormij)
        hdf5.set('nsamples', indices[1] - indices[0])
        logger.info("IVector training: Wrote Stats file '%s'", stats_file)
示例#12
0
def ivector_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the IVector algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.ivector_intermediate_file(iteration)
  new_machine_file = fs.ivector_intermediate_file(iteration + 1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("IVector training: Skipping IVector M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('projected_gmm', 'train_projector')
    # try if there is one file containing all data
    if os.path.exists(fs.ivector_stats_file(iteration, 0, len(training_list))):
      # load stats file
      statistics = self._read_stats(fs.ivector_stats_file(iteration, 0, len(training_list)))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] >= job_indices[0]:
          stats_files.append(fs.ivector_stats_file(iteration, job_indices[0], job_indices[-1]))
      # read all stats files
      statistics = _accumulate(stats_files)

    # Load machine
    algorithm.load_ubm(fs.ubm_file)
    if iteration:
      tv = bob.learn.em.IVectorMachine(bob.io.base.HDF5File(old_machine_file))
      tv.ubm = algorithm.ubm
    else:
      tv = bob.learn.em.IVectorMachine(algorithm.ubm, algorithm.subspace_dimension_of_t, algorithm.variance_threshold)

    # Creates the IVectorTrainer and initialize values
    trainer = algorithm.ivector_trainer
    trainer.reset_accumulators(tv)
    trainer.acc_nij_wij2 = statistics[0]
    trainer.acc_fnormij_wij = statistics[1]
    trainer.acc_nij = statistics[2]
    trainer.acc_snormij = statistics[3]
    trainer.m_step(tv) # data is not used in M-step
    logger.info("IVector training: Performed M step %d", iteration)

    # Save the IVector model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    tv.save(bob.io.base.HDF5File(new_machine_file, 'w'))
    logger.info("IVector training: Wrote new IVector machine '%s'", new_machine_file)

  if iteration == algorithm.tv_training_iterations-1:
    shutil.copy(new_machine_file, fs.tv_file)
    logger.info("IVector training: Wrote new TV matrix '%s'", fs.tv_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.ivector_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#13
0
def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the GMM training (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.gmm_intermediate_file(iteration)
  new_machine_file = fs.gmm_intermediate_file(iteration + 1)

  if utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping GMM M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.gmm_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.gmm_stats_file(iteration, 0, len(training_list))
      # load stats file
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          stats_files.append(fs.gmm_stats_file(iteration, job_indices[0], job_indices[-1]))

      # read all stats files
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_files[0]))
      for stats_file in stats_files[1:]:
        gmm_stats += bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))

    # load the old gmm machine
    gmm_machine =  bob.learn.em.GMMMachine(bob.io.base.HDF5File(old_machine_file))

    # initialize the trainer
    trainer = algorithm.ubm_trainer
    trainer.initialize(gmm_machine)
    trainer.gmm_statistics = gmm_stats

    # Calls M-step (no data required)
    trainer.m_step(gmm_machine)

    # Saves the GMM statistics to the file
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    gmm_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # Write the final UBM file after the last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.gmm_training_iterations-1:
    shutil.copy(new_machine_file, fs.ubm_file)
    logger.info("UBM training: Wrote new UBM '%s'", fs.ubm_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.gmm_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#14
0
def isv_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the ISV algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.isv_intermediate_file(iteration)
  new_machine_file = fs.isv_intermediate_file(iteration + 1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("ISV training: Skipping ISV M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True)
    # try if there is one file containing all data
    if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))):
      # load stats file
      statistics = _read_stats(fs.isv_stats_file(iteration, 0, len(training_list)))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] >= job_indices[0]:
          stats_files.append(fs.isv_stats_file(iteration, job_indices[0], job_indices[-1]))
      # read all stats files
      statistics = _accumulate(stats_files)

    # Load machine
    algorithm.load_ubm(fs.ubm_file)
    if iteration:
      isv_base     = bob.learn.em.ISVBase(bob.io.base.HDF5File(old_machine_file))
      isv_base.ubm = algorithm.ubm
    else:
      isv_base = bob.learn.em.ISVBase(algorithm.ubm, algorithm.subspace_dimension_of_u)

    # Creates the IVectorTrainer and initialize values
    trainer = algorithm.isv_trainer
    data = [algorithm.read_gmm_stats(training_list[0])]#Loading data just to allocate memory
    trainer.initialize(isv_base, data) #Just to allocate memory
    trainer.acc_u_a1 = statistics[0]
    trainer.acc_u_a2 = statistics[1]
    trainer.m_step(isv_base) # data is not used in M-step
    logger.info("ISV training: Performed M step %d", iteration)

    # Save the ISV model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w'))
    logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file)

  if iteration == algorithm.isv_training_iterations-1:
    shutil.copy(new_machine_file, fs.isv_file)
    logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.isv_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#15
0
def gmm_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the GMM training (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.gmm_intermediate_file(iteration)
  new_machine_file = fs.gmm_intermediate_file(iteration + 1)

  if utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping GMM M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.gmm_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.gmm_stats_file(iteration, 0, len(training_list))
      # load stats file
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))
    else:
      # load several files
      stats_files = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          stats_files.append(fs.gmm_stats_file(iteration, job_indices[0], job_indices[-1]))

      # read all stats files
      gmm_stats = bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_files[0]))
      for stats_file in stats_files[1:]:
        gmm_stats += bob.learn.em.GMMStats(bob.io.base.HDF5File(stats_file))

    # load the old gmm machine
    gmm_machine =  bob.learn.em.GMMMachine(bob.io.base.HDF5File(old_machine_file))

    # initialize the trainer
    trainer = algorithm.ubm_trainer
    trainer.initialize(gmm_machine)
    trainer.gmm_statistics = gmm_stats

    # Calls M-step (no data required)
    trainer.m_step(gmm_machine)

    # Saves the GMM statistics to the file
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    gmm_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # Write the final UBM file after the last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.gmm_training_iterations-1:
    shutil.copy(new_machine_file, fs.ubm_file)
    logger.info("UBM training: Wrote new UBM '%s'", fs.ubm_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.gmm_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#16
0
def save_isv_projector(algorithm, force=False):
  fs = FileSelector.instance()
  if utils.check_file(fs.projector_file, force, 1000):
    logger.info("- Projector '%s' already exists.", fs.projector_file)
  else:
    # save the projector into one file
    algorithm.load_ubm(fs.ubm_file)
    algorithm.load_isv(fs.isv_file)
    logger.info("Writing projector into file %s", fs.projector_file)
    algorithm.save_projector(fs.projector_file)
示例#17
0
def save_isv_projector(algorithm, force=False):
    fs = FileSelector.instance()
    if utils.check_file(fs.projector_file, force, 1000):
        logger.info("- Projector '%s' already exists.", fs.projector_file)
    else:
        # save the projector into one file
        algorithm.load_ubm(fs.ubm_file)
        algorithm.load_isv(fs.isv_file)
        logger.info("Writing projector into file %s", fs.projector_file)
        algorithm.save_projector(fs.projector_file)
示例#18
0
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the K-Means algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.kmeans_intermediate_file(iteration)
  new_machine_file = fs.kmeans_intermediate_file(iteration+1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.kmeans_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.kmeans_stats_file(iteration, 0, len(training_list))
      # load stats file
      statistics = _read_stats(stats_file)
    else:
      # load several files
      filenames = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          filenames.append(fs.kmeans_stats_file(iteration, job_indices[0], job_indices[-1]))
      statistics = _accumulate(filenames)

    # Creates the KMeansMachine
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(old_machine_file))
    trainer = algorithm.kmeans_trainer
    trainer.reset_accumulators(kmeans_machine)

    trainer.zeroeth_order_statistics = statistics[0]
    trainer.first_order_statistics = statistics[1]
    trainer.average_min_distance = statistics[3]
    error = statistics[3] / statistics[2]

    # Performs the M-step
    trainer.m_step(kmeans_machine, None) # data is not used in M-step
    logger.info("UBM training: Performed M step %d with result %f" % (iteration, error))

    # Save the K-Means model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    kmeans_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # copy the k_means file, when last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.kmeans_training_iterations-1:
    shutil.copy(new_machine_file, fs.kmeans_file)
    logger.info("UBM training: Wrote new KMeans machine '%s'", fs.kmeans_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.kmeans_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#19
0
def kmeans_mstep(algorithm, iteration, number_of_parallel_jobs, force=False, clean=False):
  """Performs a single M-step of the K-Means algorithm (non-parallel)"""
  fs = FileSelector.instance()

  old_machine_file = fs.kmeans_intermediate_file(iteration)
  new_machine_file = fs.kmeans_intermediate_file(iteration+1)

  if  utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists", new_machine_file)
  else:
    # get the files from e-step
    training_list = fs.training_list('extracted', 'train_projector')

    # try if there is one file containing all data
    if os.path.exists(fs.kmeans_stats_file(iteration, 0, len(training_list))):
      stats_file = fs.kmeans_stats_file(iteration, 0, len(training_list))
      # load stats file
      statistics = _read_stats(stats_file)
    else:
      # load several files
      filenames = []
      for job in range(number_of_parallel_jobs):
        job_indices = tools.indices(training_list, number_of_parallel_jobs, job+1)
        if job_indices[-1] > job_indices[0]:
          filenames.append(fs.kmeans_stats_file(iteration, job_indices[0], job_indices[-1]))
      statistics = _accumulate(filenames)

    # Creates the KMeansMachine
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(old_machine_file))
    trainer = algorithm.kmeans_trainer
    trainer.reset_accumulators(kmeans_machine)

    trainer.zeroeth_order_statistics = statistics[0]
    trainer.first_order_statistics = statistics[1]
    trainer.average_min_distance = statistics[3]
    error = statistics[3] / statistics[2]

    # Performs the M-step
    trainer.m_step(kmeans_machine, None) # data is not used in M-step
    logger.info("UBM training: Performed M step %d with result %f" % (iteration, error))

    # Save the K-Means model
    bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
    kmeans_machine.save(bob.io.base.HDF5File(new_machine_file, 'w'))

  # copy the k_means file, when last iteration
  # TODO: implement other stopping criteria
  if iteration == algorithm.kmeans_training_iterations-1:
    shutil.copy(new_machine_file, fs.kmeans_file)
    logger.info("UBM training: Wrote new KMeans machine '%s'", fs.kmeans_file)

  if clean and iteration > 0:
    old_dir = os.path.dirname(fs.kmeans_intermediate_file(iteration-1))
    logger.info("Removing old intermediate directory '%s'", old_dir)
    shutil.rmtree(old_dir)
示例#20
0
def kmeans_estep(algorithm,
                 extractor,
                 iteration,
                 indices,
                 force=False,
                 allow_missing_files=False):
    """Performs a single E-step of the K-Means algorithm (parallel)"""
    if indices[0] >= indices[1]:
        return

    fs = FileSelector.instance()

    # check if we need to compute this step
    stats_file = fs.kmeans_stats_file(iteration, indices[0], indices[1])
    new_machine_file = fs.kmeans_intermediate_file(iteration + 1)

    if utils.check_file(stats_file, force, 1000) or utils.check_file(
            new_machine_file, force, 1000):
        logger.info(
            "UBM training: Skipping KMeans E-Step since the file '%s' or '%s' already exists",
            stats_file, new_machine_file)
    else:
        training_list = fs.training_list('extracted', 'train_projector')
        last_machine_file = fs.kmeans_intermediate_file(iteration)
        kmeans_machine = bob.learn.em.KMeansMachine(
            bob.io.base.HDF5File(last_machine_file))

        logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)",
                    iteration, *indices)

        # read the features
        reader = functools.partial(read_feature, extractor)
        data = utils.vstack_features(
            reader,
            (training_list[index] for index in range(indices[0], indices[1])),
            allow_missing_files=allow_missing_files)

        # Performs the E-step
        trainer = algorithm.kmeans_trainer
        trainer.e_step(kmeans_machine, data)

        # write results to file
        dist = numpy.array(trainer.average_min_distance)
        nsamples = numpy.array([indices[1] - indices[0]], dtype=numpy.float64)

        # write statistics
        bob.io.base.create_directories_safe(os.path.dirname(stats_file))
        hdf5 = bob.io.base.HDF5File(stats_file, 'w')
        hdf5.set('zeros', trainer.zeroeth_order_statistics)
        hdf5.set('first', trainer.first_order_statistics)
        hdf5.set('dist', dist * nsamples)
        hdf5.set('nsamples', nsamples)

        logger.info("UBM training: Wrote Stats file '%s'", stats_file)
示例#21
0
def train_lda(algorithm, force=False):
  """Train the feature projector with the extracted features of the world group."""
  fs = FileSelector.instance()
  if utils.check_file(fs.lda_file, force, 1000):
    logger.info("- LDA projector '%s' already exists.", fs.lda_file)
  else:
    train_files = fs.training_list('whitened', 'train_projector', arrange_by_client = True)
    train_features = [[bob.bio.base.load(filename) for filename in client_files] for client_files in train_files]
    # perform training
    algorithm.train_lda(train_features)
    bob.io.base.create_directories_safe(os.path.dirname(fs.lda_file))
    bob.bio.base.save(algorithm.lda, fs.lda_file)
示例#22
0
def train_whitener(algorithm, force=False):
  """Train the feature projector with the extracted features of the world group."""
  fs = FileSelector.instance()

  if utils.check_file(fs.whitener_file, force, 1000):
    logger.info("- Whitening projector '%s' already exists.", fs.whitener_file)
  else:
    train_files = fs.training_list('projected_ivector', 'train_projector')
    train_features = [bob.bio.base.load(f) for f in train_files]
    # perform training
    algorithm.train_whitener(train_features)
    bob.io.base.create_directories_safe(os.path.dirname(fs.whitener_file))
    bob.bio.base.save(algorithm.whitener, fs.whitener_file)
示例#23
0
def isv_estep(algorithm, iteration, indices, force=False):
    """Performs a single E-step of the ISV U matric training algorithm (parallel)"""

    fs = FileSelector.instance()
    stats_file = fs.isv_stats_file(iteration, indices[0], indices[1])

    if utils.check_file(stats_file, force, 1000):
        logger.info(
            "ISV training: Skipping ISV E-Step since the file '%s' already exists",
            stats_file)
    else:
        logger.info("ISV training: E-Step from range(%d, %d)", *indices)

        # Temporary machine used for initialization
        algorithm.load_ubm(fs.ubm_file)

        # get the IVectorTrainer and call the initialization procedure
        trainer = algorithm.isv_trainer

        # Load data
        training_list = fs.training_list('projected_gmm',
                                         'train_projector',
                                         arrange_by_client=True)
        data = [
            algorithm.read_gmm_stats(training_list[i])
            for i in range(indices[0], indices[1])
        ]

        # Load machine
        if iteration:
            # load last ISV file
            isv_base = bob.learn.em.ISVBase(
                bob.io.base.HDF5File(fs.isv_intermediate_file(iteration)))
            isv_base.ubm = algorithm.ubm
        else:
            # create new ISV Base
            isv_base = bob.learn.em.ISVBase(algorithm.ubm,
                                            algorithm.subspace_dimension_of_u)

        # Perform the E-step
        trainer.initialize(isv_base, data,
                           rng=algorithm.rng)  #Just to reset the accumulators
        trainer.e_step(isv_base, data)

        # write results to file
        bob.io.base.create_directories_safe(os.path.dirname(stats_file))
        hdf5 = bob.io.base.HDF5File(stats_file, 'w')
        hdf5.set('acc_u_a1', trainer.acc_u_a1)
        hdf5.set('acc_u_a2', trainer.acc_u_a2)
        logger.info("ISV training: Wrote Stats file '%s'", stats_file)
示例#24
0
def save_projector(algorithm, force=False):
  fs = FileSelector.instance()
  if utils.check_file(fs.projector_file, force, 1000):
    logger.info("- Projector '%s' already exists.", fs.projector_file)
  else:
    # save the projector into one file
    algorithm.load_ubm(fs.ubm_file)
    algorithm.load_tv(fs.tv_file)
    algorithm.load_whitener(fs.whitener_file)
    if algorithm.use_lda:
      algorithm.load_lda(fs.lda_file)
    if algorithm.use_wccn:
      algorithm.load_wccn(fs.wccn_file)
    if algorithm.use_plda:
      algorithm.load_plda(fs.plda_file)
    logger.info("Writing projector into file %s", fs.projector_file)
    algorithm.save_projector(fs.projector_file)
示例#25
0
def gmm_estep(algorithm,
              extractor,
              iteration,
              indices,
              force=False,
              allow_missing_files=False):
    """Performs a single E-step of the GMM training (parallel)."""
    if indices[0] >= indices[1]:
        return
    fs = FileSelector.instance()

    stats_file = fs.gmm_stats_file(iteration, indices[0], indices[1])
    new_machine_file = fs.gmm_intermediate_file(iteration + 1)

    if utils.check_file(stats_file, force, 1000) or utils.check_file(
            new_machine_file, force, 1000):
        logger.info(
            "UBM training: Skipping GMM E-Step since the file '%s' or '%s' already exists",
            stats_file, new_machine_file)
    else:
        training_list = fs.training_list('extracted', 'train_projector')
        last_machine_file = fs.gmm_intermediate_file(iteration)
        gmm_machine = bob.learn.em.GMMMachine(
            bob.io.base.HDF5File(last_machine_file))

        logger.info("UBM training: GMM E-Step from range(%d, %d)", *indices)

        # read the features
        reader = functools.partial(read_feature, extractor)
        data = utils.vstack_features(
            reader,
            (training_list[index] for index in range(indices[0], indices[1])),
            allow_missing_files=allow_missing_files)

        trainer = algorithm.ubm_trainer
        trainer.initialize(gmm_machine, None)

        # Calls the E-step and extracts the GMM statistics
        algorithm.ubm_trainer.e_step(gmm_machine, data)
        gmm_stats = algorithm.ubm_trainer.gmm_statistics

        # Saves the GMM statistics to the file
        bob.io.base.create_directories_safe(os.path.dirname(stats_file))
        gmm_stats.save(bob.io.base.HDF5File(stats_file, 'w'))
        logger.info("UBM training: Wrote GMM stats '%s'", stats_file)
示例#26
0
def ivector_estep(algorithm, iteration, indices, force=False):
  """Performs a single E-step of the IVector algorithm (parallel)"""
  fs = FileSelector.instance()
  stats_file = fs.ivector_stats_file(iteration, indices[0], indices[1])

  if utils.check_file(stats_file, force, 1000):
    logger.info("IVector training: Skipping IVector E-Step since the file '%s' already exists", stats_file)
  else:
    logger.info("IVector training: E-Step from range(%d, %d)", *indices)

    # Temporary machine used for initialization
    algorithm.load_ubm(fs.ubm_file)

    # get the IVectorTrainer and call the initialization procedure
    trainer = algorithm.ivector_trainer

    # Load machine
    if iteration:
      # load last TV file
      tv = bob.learn.em.IVectorMachine(bob.io.base.HDF5File(fs.ivector_intermediate_file(iteration)))
      tv.ubm = algorithm.ubm
    else:
      # create new TV machine
      tv = bob.learn.em.IVectorMachine(algorithm.ubm, algorithm.subspace_dimension_of_t, algorithm.variance_threshold)
      trainer.initialize(tv)

    # Load data
    training_list = fs.training_list('projected_gmm', 'train_projector')
    data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])]

    # Perform the E-step
    trainer.e_step(tv, data)

    # write results to file
    bob.io.base.create_directories_safe(os.path.dirname(stats_file))
    hdf5 = bob.io.base.HDF5File(stats_file, 'w')
    hdf5.set('acc_nij_wij2', trainer.acc_nij_wij2)
    hdf5.set('acc_fnormij_wij', trainer.acc_fnormij_wij)
    hdf5.set('acc_nij', trainer.acc_nij)
    hdf5.set('acc_snormij', trainer.acc_snormij)
    hdf5.set('nsamples', indices[1] - indices[0])
    logger.info("IVector training: Wrote Stats file '%s'", stats_file)
示例#27
0
def train_isv(algorithm, force=False):
  """Finally, the UBM is used to train the ISV projector/enroller."""
  fs = FileSelector.instance()

  if utils.check_file(fs.projector_file, force, 800):
    logger.info("ISV training: Skipping ISV training since '%s' already exists", fs.projector_file)
  else:
    # read UBM into the ISV class
    algorithm.load_ubm(fs.ubm_file)

    # read training data
    training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client = True)
    train_gmm_stats = [[algorithm.read_gmm_stats(filename) for filename in client_files] for client_files in training_list]

    # perform ISV training
    logger.info("ISV training: training ISV with %d clients", len(train_gmm_stats))
    algorithm.train_isv(train_gmm_stats)
    # save result
    bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
    algorithm.save_projector(fs.projector_file)
示例#28
0
def whitening_project(algorithm, indices, force=False):
  """Performs IVector projection"""
  fs = FileSelector.instance()
  algorithm.load_whitener(fs.whitener_file)

  ivector_files     = fs.training_list('projected_ivector', 'train_projector')
  whitened_files = fs.training_list('whitened', 'train_projector')

  logger.info("IVector training: whitening ivectors range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['projected_ivector'], fs.directories['whitened'])
  # extract the features
  for i in range(indices[0], indices[1]):
    ivector_file = ivector_files[i]
    whitened_file = whitened_files[i]
    if not utils.check_file(whitened_file, force):
      # load feature
      ivector = algorithm.read_feature(ivector_file)
      # project feature
      whitened = algorithm.project_whitening(ivector)
      # write it
      bob.io.base.create_directories_safe(os.path.dirname(whitened_file))
      bob.bio.base.save(whitened, whitened_file)
示例#29
0
def kmeans_initialize(algorithm, extractor, limit_data = None, force = False):
  """Initializes the K-Means training (non-parallel)."""
  fs = FileSelector.instance()

  output_file = fs.kmeans_intermediate_file(0)

  if utils.check_file(output_file, force, 1000):
    logger.info("UBM training: Skipping KMeans initialization since the file '%s' already exists", output_file)
  else:
    # read data
    logger.info("UBM training: initializing kmeans")
    training_list = utils.selected_elements(fs.training_list('extracted', 'train_projector'), limit_data)
    data = numpy.vstack([read_feature(extractor, feature_file) for feature_file in training_list])

    # Perform KMeans initialization
    kmeans_machine = bob.learn.em.KMeansMachine(algorithm.gaussians, data.shape[1])
    # Creates the KMeansTrainer and call the initialization procedure
    algorithm.kmeans_trainer.initialize(kmeans_machine, data, algorithm.rng)
    bob.io.base.create_directories_safe(os.path.dirname(output_file))
    kmeans_machine.save(bob.io.base.HDF5File(output_file, 'w'))
    logger.info("UBM training: saved initial KMeans machine to '%s'", output_file)
示例#30
0
def lda_project(algorithm, indices, force=False):
  """Performs IVector projection"""
  fs = FileSelector.instance()
  algorithm.load_lda(fs.lda_file)

  whitened_files = fs.training_list('whitened', 'train_projector')
  lda_projected_files = fs.training_list('lda_projected', 'train_projector')

  logger.info("IVector training: LDA projection range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['whitened'], fs.directories['lda_projected'])
  # extract the features
  for i in range(indices[0], indices[1]):
    ivector_file = whitened_files[i]
    lda_projected_file = lda_projected_files[i]
    if not utils.check_file(lda_projected_file, force):
      # load feature
      ivector = algorithm.read_feature(ivector_file)
      # project feature
      lda_projected = algorithm.project_lda(ivector)
      # write it
      bob.io.base.create_directories_safe(os.path.dirname(lda_projected_file))
      bob.bio.base.save(lda_projected, lda_projected_file)
示例#31
0
def kmeans_estep(algorithm, extractor, iteration, indices, force=False):
  """Performs a single E-step of the K-Means algorithm (parallel)"""
  if indices[0] >= indices[1]:
    return

  fs = FileSelector.instance()

  # check if we need to compute this step
  stats_file = fs.kmeans_stats_file(iteration, indices[0], indices[1])
  new_machine_file = fs.kmeans_intermediate_file(iteration + 1)

  if  utils.check_file(stats_file, force, 1000) or utils.check_file(new_machine_file, force, 1000):
    logger.info("UBM training: Skipping KMeans E-Step since the file '%s' or '%s' already exists", stats_file, new_machine_file)
  else:
    training_list = fs.training_list('extracted', 'train_projector')
    last_machine_file = fs.kmeans_intermediate_file(iteration)
    kmeans_machine = bob.learn.em.KMeansMachine(bob.io.base.HDF5File(last_machine_file))

    logger.info("UBM training: KMeans E-Step round %d from range(%d, %d)", iteration, *indices)

    # read data
    data = numpy.vstack([read_feature(extractor, training_list[index]) for index in range(indices[0], indices[1])])

    # Performs the E-step
    trainer = algorithm.kmeans_trainer
    trainer.e_step(kmeans_machine, data)

    # write results to file
    dist = numpy.array(trainer.average_min_distance)
    nsamples = numpy.array([indices[1] - indices[0]], dtype=numpy.float64)

    # write statistics
    bob.io.base.create_directories_safe(os.path.dirname(stats_file))
    hdf5 = bob.io.base.HDF5File(stats_file, 'w')
    hdf5.set('zeros', trainer.zeroeth_order_statistics)
    hdf5.set('first', trainer.first_order_statistics)
    hdf5.set('dist', dist * nsamples)
    hdf5.set('nsamples', nsamples)

    logger.info("UBM training: Wrote Stats file '%s'", stats_file)
示例#32
0
def isv_estep(algorithm, iteration, indices, force=False):
  """Performs a single E-step of the ISV U matric training algorithm (parallel)"""

  fs = FileSelector.instance()
  stats_file = fs.isv_stats_file(iteration, indices[0], indices[1])

  if utils.check_file(stats_file, force, 1000):
    logger.info("ISV training: Skipping ISV E-Step since the file '%s' already exists", stats_file)
  else:
    logger.info("ISV training: E-Step from range(%d, %d)", *indices)

    # Temporary machine used for initialization
    algorithm.load_ubm(fs.ubm_file)

    # get the IVectorTrainer and call the initialization procedure
    trainer = algorithm.isv_trainer

    # Load data
    training_list = fs.training_list('projected_gmm', 'train_projector', arrange_by_client=True)
    data = [algorithm.read_gmm_stats(training_list[i]) for i in range(indices[0], indices[1])]

    # Load machine
    if iteration:
      # load last ISV file
      isv_base     = bob.learn.em.ISVBase(bob.io.base.HDF5File(fs.isv_intermediate_file(iteration)))
      isv_base.ubm = algorithm.ubm
    else:
      # create new ISV Base
      isv_base = bob.learn.em.ISVBase(algorithm.ubm, algorithm.subspace_dimension_of_u)

    # Perform the E-step 
    trainer.initialize(isv_base, data, rng = algorithm.rng) #Just to reset the accumulators
    trainer.e_step(isv_base, data)

    # write results to file
    bob.io.base.create_directories_safe(os.path.dirname(stats_file))
    hdf5 = bob.io.base.HDF5File(stats_file, 'w')
    hdf5.set('acc_u_a1', trainer.acc_u_a1)
    hdf5.set('acc_u_a2', trainer.acc_u_a2)
    logger.info("ISV training: Wrote Stats file '%s'", stats_file)
示例#33
0
def ivector_project(algorithm, indices, force=False):
  """Performs IVector projection"""
  # read UBM and TV into the IVector class
  fs = FileSelector.instance()
  algorithm.load_ubm(fs.ubm_file)
  algorithm.load_tv(fs.tv_file)

  gmm_stats_files = fs.training_list('projected_gmm', 'train_projector')
  ivector_files = fs.training_list('projected_ivector', 'train_projector')

  logger.info("IVector training: Project features range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['projected_gmm'], fs.directories['projected_ivector'])
  # extract the features
  for i in range(indices[0], indices[1]):
    gmm_stats_file = gmm_stats_files[i]
    ivector_file = ivector_files[i]
    if not utils.check_file(ivector_file, force):
      # load feature
      feature = algorithm.read_gmm_stats(gmm_stats_file)
      # project feature
      projected = algorithm.project_ivector(feature)
      # write it
      bob.io.base.create_directories_safe(os.path.dirname(ivector_file))
      bob.bio.base.save(projected, ivector_file)
示例#34
0
def gmm_project(algorithm, extractor, indices, force=False):
  """Performs GMM projection"""
  fs = FileSelector.instance()

  algorithm.load_ubm(fs.ubm_file)

  feature_files = fs.training_list('extracted', 'train_projector')
  projected_files = fs.training_list('projected_gmm', 'train_projector')

  logger.info("ISV training: Project features range (%d, %d) from '%s' to '%s'", indices[0], indices[1], fs.directories['extracted'], fs.directories['projected_gmm'])

  # extract the features
  for i in range(indices[0], indices[1]):
    feature_file = feature_files[i]
    projected_file = projected_files[i]

    if not utils.check_file(projected_file, force):
      # load feature
      feature = read_feature(extractor, feature_file)
      # project feature
      projected = algorithm.project_ubm(feature)
      # write it
      bob.io.base.create_directories_safe(os.path.dirname(projected_file))
      bob.bio.base.save(projected, projected_file)
示例#35
0
def isv_mstep(algorithm,
              iteration,
              number_of_parallel_jobs,
              force=False,
              clean=False):
    """Performs a single M-step of the ISV algorithm (non-parallel)"""
    fs = FileSelector.instance()

    old_machine_file = fs.isv_intermediate_file(iteration)
    new_machine_file = fs.isv_intermediate_file(iteration + 1)

    if utils.check_file(new_machine_file, force, 1000):
        logger.info(
            "ISV training: Skipping ISV M-Step since the file '%s' already exists",
            new_machine_file)
    else:
        # get the files from e-step
        training_list = fs.training_list('projected_gmm',
                                         'train_projector',
                                         arrange_by_client=True)
        # try if there is one file containing all data
        if os.path.exists(fs.isv_stats_file(iteration, 0, len(training_list))):
            # load stats file
            statistics = _read_stats(
                fs.isv_stats_file(iteration, 0, len(training_list)))
        else:
            # load several files
            stats_files = []
            for job in range(number_of_parallel_jobs):
                job_indices = tools.indices(training_list,
                                            number_of_parallel_jobs, job + 1)
                if job_indices[-1] >= job_indices[0]:
                    stats_files.append(
                        fs.isv_stats_file(iteration, job_indices[0],
                                          job_indices[-1]))
            # read all stats files
            statistics = _accumulate(stats_files)

        # Load machine
        algorithm.load_ubm(fs.ubm_file)
        if iteration:
            isv_base = bob.learn.em.ISVBase(
                bob.io.base.HDF5File(old_machine_file))
            isv_base.ubm = algorithm.ubm
        else:
            isv_base = bob.learn.em.ISVBase(algorithm.ubm,
                                            algorithm.subspace_dimension_of_u)

        # Creates the IVectorTrainer and initialize values
        trainer = algorithm.isv_trainer
        data = [algorithm.read_gmm_stats(training_list[0])
                ]  #Loading data just to allocate memory
        trainer.initialize(isv_base, data)  #Just to allocate memory
        trainer.acc_u_a1 = statistics[0]
        trainer.acc_u_a2 = statistics[1]
        trainer.m_step(isv_base)  # data is not used in M-step
        logger.info("ISV training: Performed M step %d", iteration)

        # Save the ISV model
        bob.io.base.create_directories_safe(os.path.dirname(new_machine_file))
        isv_base.save(bob.io.base.HDF5File(new_machine_file, 'w'))
        logger.info("ISV training: Wrote new ISV Base '%s'", new_machine_file)

    if iteration == algorithm.isv_training_iterations - 1:
        shutil.copy(new_machine_file, fs.isv_file)
        logger.info("ISV training: Wrote new TV matrix '%s'", fs.isv_file)

    if clean and iteration > 0:
        old_dir = os.path.dirname(fs.isv_intermediate_file(iteration - 1))
        logger.info("Removing old intermediate directory '%s'", old_dir)
        shutil.rmtree(old_dir)
示例#36
0
    def train_enroller(self, train_features, enroller_file, metadata=None):
        """Computes the Universal Background Model from the training ("world") data"""

        ######################################
        # TODO: This is a critical moment.
        # With the next two lines of code we are breaking completely the isolation concept implemented
        # in bob.bio.base by introducing database knowledge inside of the algorithm.
        # This is a total HACK.
        # In short, we just opened the gates from hell.
        # Some demons may come out and the might terrorize innocent people.
        # Do your prayers, you will need them.
        # Only faith can save your soul.
        # God forgive us
        fs = FileSelector.instance()
        #train_files = fs.training_objects('extracted', 'train_projector', arrange_by_client = True)
        train_files = fs.database.training_files('train_projector', True)

        #####

        # stacking all the features. TODO: This is super sub-optimal
        train_features_flatten = numpy.vstack(
            [feature for client in train_features for feature in client])

        # training UBM (it's on self.ubm)
        self.train_ubm(train_features_flatten)

        # Now it comes the hack.
        # We would need to stack the features from all classes

        # Setting the MAP Trainer
        self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(
            self.ubm,
            relevance_factor=self.relevance_factor,
            update_means=True,
            update_variances=False)

        # Efficiency tip, let's pre-allocate the supervector arrays
        mean_supervectors = []
        for client in train_features:
            shape = (len(client), self.ubm.mean_supervector.shape[0])
            mean_supervectors.append(numpy.zeros(shape))

        # Now let's compute the supervectors
        for client, i in zip(train_features, range(len(train_features))):
            for feature, j in zip(client, range(len(client))):
                # Running MAP
                map_feature = self.enroll_gmm(feature)
                mean_supervectors[i][j] = map_feature.mean_supervector

        # The enroller is composed by the UBM and all the training supervector samples

        # saving ubm
        hdf5 = bob.io.base.HDF5File(enroller_file, "w")
        hdf5.create_group("/UBM")
        hdf5.cd("/UBM")
        self.ubm.save(hdf5)

        # saving supervectors
        hdf5.create_group("/train_supervectors")
        hdf5.cd("/train_supervectors")
        for i in range(len(mean_supervectors)):
            # Fetching and memorizing the client id, so we can use it during the enroll
            class_id = train_files[i][0].client_id
            hdf5.set("{0}".format(class_id), mean_supervectors[i])