示例#1
0
    def kmeans_initialize(self, force=False):
        """Initializes the K-Means training (non-parallel)."""
        output_file = self.m_configuration.kmeans_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans initialization since the file '%s' already exists"
                % output_file)
        else:
            # read data
            utils.info("UBM training: initializing kmeans")
            training_list = self.m_file_selector.training_feature_list()
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Perform KMeans initialization
            kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians,
                                                       data.shape[1])
            # Creates the KMeansTrainer and call the initialization procedure
            kmeans_trainer = bob.trainer.KMeansTrainer()
            kmeans_trainer.initialize(kmeans_machine, data)
            utils.ensure_dir(os.path.dirname(output_file))
            kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
            utils.info("UBM training: saved initial KMeans machine to '%s'" %
                       output_file)
示例#2
0
def gmm(data):
    """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
	 This might require a lot of memory."""
    output_file = GMM_HDF5
    print "UBM Training - Step 2: Initializing GMM...."

    # load KMeans machine
    kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(KMeans_HDF5))

    # Create initial GMM Machine
    gmm_machine = bob.machine.GMMMachine(gaussians, data.shape[1])

    [variances,
     weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

    # Initializes the GMM
    gmm_machine.means = kmeans_machine.means
    gmm_machine.variances = variances
    gmm_machine.weights = weights
    gmm_machine.set_variance_thresholds(variance_threshold)

    # Creates the GMMTrainer and trains the GMM
    gmm_trainer = bob.trainer.ML_GMMTrainer(True, True, True)
    gmm_trainer.max_iterations = max_iterations
    gmm_trainer.rng = bob.core.random.mt19937(INIT_SEED)

    gmm_trainer.train(gmm_machine, data)
    utils.ensure_dir(os.path.dirname(output_file))
    gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
    print "UBM Training - Step 2: Wrote GMM file '%s'" % output_file
示例#3
0
    def extract_features(self,
                         extractor,
                         preprocessor,
                         indices=None,
                         force=False):
        """Extracts the features from the preprocessed data using the given extractor."""
        extractor.load(str(self.m_file_selector.extractor_file))
        data_files = self.m_file_selector.preprocessed_data_list()
        feature_files = self.m_file_selector.feature_list()

        # select a subset of indices to iterate
        if indices != None:
            index_range = range(indices[0], indices[1])
            utils.info("- Extraction: splitting of index range %s" %
                       str(indices))
        else:
            index_range = range(len(data_files))

        utils.ensure_dir(self.m_file_selector.features_directory)
        utils.info(
            "- Extraction: extracting %d features from directory '%s' to directory '%s'"
            % (len(index_range), self.m_file_selector.preprocessed_directory,
               self.m_file_selector.features_directory))
        for i in index_range:
            data_file = data_files[i]
            feature_file = feature_files[i]

            if not self.__check_file__(feature_file, force):
                # load data
                data = preprocessor.read_data(str(data_file))
                # extract feature
                feature = extractor(data, data_file)
                # Save feature
                utils.ensure_dir(os.path.dirname(feature_file))
                extractor.save_feature(feature, str(feature_file))
示例#4
0
    def project_features(self, tool, extractor, indices=None, force=False):
        """Projects the features for all files of the database."""
        # load the projector file
        if tool.performs_projection:
            tool.load_projector(str(self.m_file_selector.projector_file))

            feature_files = self.m_file_selector.feature_list()
            projected_files = self.m_file_selector.projected_list()

            # select a subset of indices to iterate
            if indices != None:
                index_range = range(indices[0], indices[1])
                utils.info("- Projection: splitting of index range %s" %
                           str(indices))
            else:
                index_range = range(len(feature_files))

            utils.ensure_dir(self.m_file_selector.projected_directory)
            utils.info(
                "- Projection: projecting %d features from directory '%s' to directory '%s'"
                % (len(index_range), self.m_file_selector.features_directory,
                   self.m_file_selector.projected_directory))
            # extract the features
            for i in index_range:
                feature_file = feature_files[i]
                projected_file = projected_files[i]

                if not self.__check_file__(projected_file, force):
                    # load feature
                    feature = extractor.read_feature(str(feature_file))
                    # project feature
                    projected = tool.project(feature)
                    # write it
                    utils.ensure_dir(os.path.dirname(projected_file))
                    tool.save_feature(projected, str(projected_file))
示例#5
0
    def train_projector(self, tool, extractor, force=False):
        """Train the feature projector with the extracted features of the world group."""
        if tool.requires_projector_training:
            projector_file = self.m_file_selector.projector_file

            if self.__check_file__(projector_file, force, 1000):
                utils.info("- Projection: projector '%s' already exists." %
                           projector_file)
            else:
                utils.ensure_dir(os.path.dirname(projector_file))
                # train projector
                if tool.split_training_features_by_client:
                    train_files = self.m_file_selector.training_list(
                        'features', 'train_projector', arrange_by_client=True)
                    train_features = self.__read_features_by_client__(
                        train_files, extractor)
                    utils.info(
                        "- Projection: training projector '%s' using %d identities: "
                        % (projector_file, len(train_files)))
                else:
                    train_files = self.m_file_selector.training_list(
                        'features', 'train_projector')
                    train_features = self.__read_features__(
                        train_files, extractor)
                    utils.info(
                        "- Projection: training projector '%s' using %d training files: "
                        % (projector_file, len(train_files)))

                # perform training
                tool.train_projector(train_features, str(projector_file))
示例#6
0
  def kmeans_estep(self, indices, force=False):
    """Performs a single E-step of the K-Means algorithm (parallel)"""
    stats_file = self.m_configuration.kmeans_stats_file % (self.m_args.iteration, indices[0], indices[1])

    if  self.m_tool_chain.__check_file__(stats_file, force, 1000):
      utils.info("UBM training: Skipping KMeans E-Step since the file '%s' already exists" % stats_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(machine_file))

      utils.info("UBM training: KMeans E-Step from range(%d, %d)" % indices)

      # read data
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in range(indices[0], indices[1])])

      kmeans_trainer = bob.trainer.KMeansTrainer()
      t = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1]) # Temporary Kmeans machine required for trainer initialization
      kmeans_trainer.initialize(t, data)

      # Performs the E-step
      kmeans_trainer.e_step(kmeans_machine, data)

      # write results to file
      dist = numpy.array([kmeans_trainer.average_min_distance])
      nsamples = numpy.array([indices[1] - indices[0]], dtype=numpy.float64)

      utils.ensure_dir(os.path.dirname(stats_file))
      f = bob.io.HDF5File(stats_file, 'w')
      f.set('zeros', kmeans_trainer.zeroeth_order_statistics)
      f.set('first', kmeans_trainer.first_order_statistics)
      f.set('dist', dist * nsamples)
      f.set('nsamples', nsamples)
      utils.info("UBM training: Wrote Stats file '%s'" % stats_file)
示例#7
0
    def train_enroller(self, tool, extractor, force=False):
        """Trains the model enroller using the extracted or projected features, depending on your setup of the base class Tool."""
        reader = tool if tool.use_projected_features_for_enrollment else extractor
        if tool.requires_enroller_training:
            enroller_file = self.m_file_selector.enroller_file

            if self.__check_file__(enroller_file, force, 1000):
                utils.info("- Enrollment: enroller '%s' already exists." %
                           enroller_file)
            else:
                utils.ensure_dir(os.path.dirname(enroller_file))
                # first, load the projector
                tool.load_projector(str(self.m_file_selector.projector_file))
                # training models
                train_files = self.m_file_selector.training_list(
                    'projected' if tool.use_projected_features_for_enrollment
                    else 'features',
                    'train_enroller',
                    arrange_by_client=True)
                train_features = self.__read_features_by_client__(
                    train_files, reader)

                # perform training
                utils.info(
                    "- Enrollment: training enroller '%s' using %d identities: "
                    % (enroller_file, len(train_features)))
                tool.train_enroller(train_features, str(enroller_file))
示例#8
0
  def gmm_estep(self, indices, force=False):
    """Performs a single E-step of the GMM training (parallel)."""
    stats_file = self.m_configuration.gmm_stats_file % (self.m_args.iteration, indices[0], indices[1])

    if  self.m_tool_chain.__check_file__(stats_file, force, 1000):
      utils.info("UBM training: Skipping GMM E-Step since the file '%s' already exists" % stats_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
      gmm_machine = bob.machine.GMMMachine(bob.io.HDF5File(machine_file))

      utils.info("UBM training: GMM E-Step from range(%d, %d)" % indices)

      # read data
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in range(indices[0], indices[1])])

      gmm_trainer = bob.trainer.ML_GMMTrainer(self.m_tool.m_update_means, self.m_tool.m_update_variances, self.m_tool.m_update_weights)
      gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
      gmm_trainer.initialize(gmm_machine, data)

      # Calls the E-step and extracts the GMM statistics
      gmm_trainer.e_step(gmm_machine, data)
      gmm_stats = gmm_trainer.gmm_statistics

      # Saves the GMM statistics to the file
      utils.ensure_dir(os.path.dirname(stats_file))
      gmm_stats.save(bob.io.HDF5File(stats_file, 'w'))
      utils.info("UBM training: Wrote GMM stats '%s'" % (stats_file))
示例#9
0
 def train_extractor(self, extractor, preprocessor, force=False):
     """Trains the feature extractor using preprocessed data of the 'world' set, if the feature extractor requires training."""
     if extractor.requires_training:
         extractor_file = self.m_file_selector.extractor_file
         if self.__check_file__(extractor_file, force, 1000):
             utils.info("- Extraction: extractor '%s' already exists." %
                        extractor_file)
         else:
             utils.ensure_dir(os.path.dirname(extractor_file))
             # read training files
             if extractor.split_training_data_by_client:
                 train_files = self.m_file_selector.training_list(
                     'preprocessed',
                     'train_extractor',
                     arrange_by_client=True)
                 train_data = self.__read_data_by_client__(
                     train_files, preprocessor)
                 utils.info(
                     "- Extraction: training extractor '%s' using %d identities: "
                     % (extractor_file, len(train_files)))
             else:
                 train_files = self.m_file_selector.training_list(
                     'preprocessed', 'train_extractor')
                 train_data = self.__read_data__(train_files, preprocessor)
                 utils.info(
                     "- Extraction: training extractor '%s' using %d training files: "
                     % (extractor_file, len(train_files)))
             # train model
             extractor.train(train_data, extractor_file, train_files)
示例#10
0
  def gmm_initialize(self, force=False):
    """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
    output_file = self.m_configuration.gmm_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 800):
      utils.info("UBM Training: Skipping GMM initialization since '%s' already exists" % output_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      utils.info("UBM Training: Initializing GMM")

      # load KMeans machine
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(self.m_configuration.kmeans_file))

      # read features
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Create initial GMM Machine
      gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians, data.shape[1])

      [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

      # Initializes the GMM
      gmm_machine.means = kmeans_machine.means
      gmm_machine.variances = variances
      gmm_machine.weights = weights
      gmm_machine.set_variance_thresholds(self.m_tool.m_variance_threshold)

      utils.ensure_dir(os.path.dirname(output_file))
      gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
      utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
示例#11
0
def gmm(data):
	"""Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
	 This might require a lot of memory."""
	output_file = GMM_HDF5
	print "UBM Training - Step 2: Initializing GMM...."

	# load KMeans machine
	kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(KMeans_HDF5))

	# Create initial GMM Machine
	gmm_machine = bob.machine.GMMMachine(gaussians, data.shape[1])

	[variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

	# Initializes the GMM
	gmm_machine.means = kmeans_machine.means
	gmm_machine.variances = variances
	gmm_machine.weights = weights
	gmm_machine.set_variance_thresholds(variance_threshold)

	# Creates the GMMTrainer and trains the GMM
	gmm_trainer = bob.trainer.ML_GMMTrainer(True, True, True)
	gmm_trainer.max_iterations = max_iterations
	gmm_trainer.rng = bob.core.random.mt19937(INIT_SEED)

	gmm_trainer.train(gmm_machine, data)
	utils.ensure_dir(os.path.dirname(output_file))
	gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
	print "UBM Training - Step 2: Wrote GMM file '%s'" % output_file
示例#12
0
  def kmeans_mstep(self, counts, force=False):
    """Performs a single M-step of the K-Means algorithm (non-parallel)"""
    old_machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
    new_machine_file = self.m_configuration.kmeans_intermediate_file % (self.m_args.iteration + 1)

    if  self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
      utils.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists" % new_machine_file)
    else:
      # get the files from e-step
      training_list = self.m_file_selector.training_feature_list()

      # try if there is one file containing all data
      if os.path.exists(self.m_configuration.kmeans_stats_file % (self.m_args.iteration, 0, len(training_list))):
        stats_file = self.m_configuration.kmeans_stats_file % (self.m_args.iteration, 0, len(training_list))
        # load stats file
        zeroeth, first, nsamples, dist = self.read_stats(stats_file)
      else:
        # load several files
        job_ids = range(self.__generate_job_array__(training_list, counts)[1])
        job_indices = [(counts * job_id, min(counts * (job_id+1), len(training_list))) for job_id in job_ids]
        stats_files = [self.m_configuration.kmeans_stats_file % (self.m_args.iteration, indices[0], indices[1]) for indices in job_indices]

        # read all stats files
        zeroeth, first, nsamples, dist = self.read_stats(stats_files[0])
        for stats_file in stats_files[1:]:
          zeroeth_, first_, nsamples_, dist_ = self.read_stats(stats_file)
          zeroeth += zeroeth_
          first += first_
          nsamples += nsamples_
          dist += dist_

      # read some features (needed for computation, but not really required)
      data = numpy.array(bob.io.load(str(training_list[0])))

      # Creates the KMeansTrainer
      kmeans_trainer = bob.trainer.KMeansTrainer()
      # Creates the KMeansMachine
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(old_machine_file))
      kmeans_trainer.initialize(kmeans_machine, data)

      kmeans_trainer.zeroeth_order_statistics = zeroeth
      kmeans_trainer.first_order_statistics = first
      kmeans_trainer.average_min_distance = dist

      # Performs the M-step
      kmeans_trainer.m_step(kmeans_machine, data) # data is not used in M-step
      utils.info("UBM training: Performed M step %d with result %f" % (self.m_args.iteration, dist/nsamples))

      # Save the K-Means model
      utils.ensure_dir(os.path.dirname(new_machine_file))
      kmeans_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
      shutil.copy(new_machine_file, self.m_configuration.kmeans_file)
      utils.info("UBM training: Wrote new KMeans machine '%s'" % new_machine_file)

    if self.m_args.clean_intermediate and self.m_args.iteration > 0:
      old_file = self.m_configuration.kmeans_intermediate_file % (self.m_args.iteration-1)
      utils.info("Removing old intermediate directory '%s'" % os.path.dirname(old_file))
      shutil.rmtree(os.path.dirname(old_file))
示例#13
0
  def gmm_mstep(self, counts, force=False):
    """Performs a single M-step of the GMM training (non-parallel)"""
    old_machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
    new_machine_file = self.m_configuration.gmm_intermediate_file % (self.m_args.iteration + 1)

    if  self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
      utils.info("UBM training: Skipping GMM M-Step since the file '%s' already exists" % new_machine_file)
    else:
      # get the files from e-step
      training_list = self.m_file_selector.training_feature_list()

      # try if there is one file containing all data
      if os.path.exists(self.m_configuration.gmm_stats_file % (self.m_args.iteration, 0, len(training_list))):
        stats_file = self.m_configuration.gmm_stats_file % (self.m_args.iteration, 0, len(training_list))
        # load stats file
        gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_file))
      else:
        # load several files
        job_ids = range(self.__generate_job_array__(training_list, counts)[1])
        job_indices = [(counts * job_id, min(counts * (job_id+1), len(training_list))) for job_id in job_ids]
        stats_files = [self.m_configuration.gmm_stats_file % (self.m_args.iteration, indices[0], indices[1]) for indices in job_indices]

        # read all stats files
        gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_files[0]))
        for stats_file in stats_files[1:]:
          gmm_stats += bob.machine.GMMStats(bob.io.HDF5File(stats_file))

      # read some features (needed for computation, but not really required)
      data = numpy.array(bob.io.load(str(training_list[0])))

      # load the old gmm machine
      gmm_machine =  bob.machine.GMMMachine(bob.io.HDF5File(old_machine_file))
      # initialize the trainer
      gmm_trainer = bob.trainer.ML_GMMTrainer(self.m_tool.m_update_means, self.m_tool.m_update_variances, self.m_tool.m_update_weights)
      gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
      gmm_trainer.initialize(gmm_machine, data)
      gmm_trainer.gmm_statistics = gmm_stats

      # Calls M-step
      gmm_trainer.m_step(gmm_machine, data)

      # Saves the GMM statistics to the file
      utils.ensure_dir(os.path.dirname(new_machine_file))
      gmm_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
      import shutil
      shutil.copy(new_machine_file, self.m_configuration.projector_file)

    if self.m_args.clean_intermediate and self.m_args.iteration > 0:
      old_file = self.m_configuration.gmm_intermediate_file % (self.m_args.iteration-1)
      utils.info("Removing old intermediate directory '%s'" % os.path.dirname(old_file))
      shutil.rmtree(os.path.dirname(old_file))
示例#14
0
    def save_feature(self, feature, feature_file):
        """Saves the given *extracted* feature to a file with the given name.
    In this base class implementation:

    - If the given feature has a 'save' attribute, it calls feature.save(bob.io.HDF5File(feature_file), 'w').
      In this case, the given feature_file might be either a file name or a bob.io.HDF5File.
    - Otherwise, it uses bob.io.save to do that.

    If you have a different format, please overwrite this function.
    """
        utils.ensure_dir(os.path.dirname(feature_file))
        if hasattr(feature, 'save'):
            # this is some class that supports saving itself
            feature.save(bob.io.HDF5File(feature_file, "w"))
        else:
            bob.io.save(feature, feature_file)
示例#15
0
    def kmeans_estep(self, indices, force=False):
        """Performs a single E-step of the K-Means algorithm (parallel)"""
        stats_file = self.m_configuration.kmeans_stats_file % (
            self.m_args.iteration, indices[0], indices[1])

        if self.m_tool_chain.__check_file__(stats_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans E-Step since the file '%s' already exists"
                % stats_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(machine_file))

            utils.info("UBM training: KMeans E-Step from range(%d, %d)" %
                       indices)

            # read data
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in range(indices[0], indices[1])
            ])

            kmeans_trainer = bob.trainer.KMeansTrainer()
            t = bob.machine.KMeansMachine(
                self.m_tool.m_gaussians, data.shape[1]
            )  # Temporary Kmeans machine required for trainer initialization
            kmeans_trainer.initialize(t, data)

            # Performs the E-step
            kmeans_trainer.e_step(kmeans_machine, data)

            # write results to file
            dist = numpy.array([kmeans_trainer.average_min_distance])
            nsamples = numpy.array([indices[1] - indices[0]],
                                   dtype=numpy.float64)

            utils.ensure_dir(os.path.dirname(stats_file))
            f = bob.io.HDF5File(stats_file, 'w')
            f.set('zeros', kmeans_trainer.zeroeth_order_statistics)
            f.set('first', kmeans_trainer.first_order_statistics)
            f.set('dist', dist * nsamples)
            f.set('nsamples', nsamples)
            utils.info("UBM training: Wrote Stats file '%s'" % stats_file)
示例#16
0
def kmeans(data):
    """the K-Means training."""
    # read data
    print "UBM Training - Step 1: initializing kmeans"
    output_file = KMeans_HDF5
    # Perform KMeans initialization
    kmeans_machine = bob.machine.KMeansMachine(gaussians, data.shape[1])
    # Creates the KMeansTrainer and trains the Kmeans
    kmeans_trainer = bob.trainer.KMeansTrainer()
    kmeans_trainer.initialization_method = kmeans_trainer.initialization_method_type.RANDOM_NO_DUPLICATE
    kmeans_trainer.max_iterations = max_iterations
    kmeans_trainer.convergence_threshold = variance_threshold
    kmeans_trainer.rng = bob.core.random.mt19937(INIT_SEED)

    kmeans_trainer.train(kmeans_machine, data)
    utils.ensure_dir(os.path.dirname(output_file))
    kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
    print "UBM Training - Step 1: Saved KMeans machine to '%s'" % output_file
示例#17
0
def kmeans(data):
	"""the K-Means training."""
	# read data
	print "UBM Training - Step 1: initializing kmeans"
	output_file = KMeans_HDF5
	# Perform KMeans initialization
	kmeans_machine = bob.machine.KMeansMachine(gaussians, data.shape[1])
	# Creates the KMeansTrainer and trains the Kmeans
	kmeans_trainer = bob.trainer.KMeansTrainer()
	kmeans_trainer.initialization_method = kmeans_trainer.initialization_method_type.RANDOM_NO_DUPLICATE
	kmeans_trainer.max_iterations =  max_iterations
	kmeans_trainer.convergence_threshold = variance_threshold
	kmeans_trainer.rng = bob.core.random.mt19937(INIT_SEED)

	kmeans_trainer.train(kmeans_machine, data)
	utils.ensure_dir(os.path.dirname(output_file))
	kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
	print "UBM Training - Step 1: Saved KMeans machine to '%s'" % output_file
示例#18
0
    def gmm_initialize(self, force=False):
        """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
        output_file = self.m_configuration.gmm_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 800):
            utils.info(
                "UBM Training: Skipping GMM initialization since '%s' already exists"
                % output_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            utils.info("UBM Training: Initializing GMM")

            # load KMeans machine
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(self.m_configuration.kmeans_file))

            # read features
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Create initial GMM Machine
            gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians,
                                                 data.shape[1])

            [
                variances, weights
            ] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

            # Initializes the GMM
            gmm_machine.means = kmeans_machine.means
            gmm_machine.variances = variances
            gmm_machine.weights = weights
            gmm_machine.set_variance_thresholds(
                self.m_tool.m_variance_threshold)

            utils.ensure_dir(os.path.dirname(output_file))
            gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
            utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
示例#19
0
  def kmeans_initialize(self, force=False):
    """Initializes the K-Means training (non-parallel)."""
    output_file = self.m_configuration.kmeans_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 1000):
      utils.info("UBM training: Skipping KMeans initialization since the file '%s' already exists" % output_file)
    else:
      # read data
      utils.info("UBM training: initializing kmeans")
      training_list = self.m_file_selector.training_feature_list()
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Perform KMeans initialization
      kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1])
      # Creates the KMeansTrainer and call the initialization procedure
      kmeans_trainer = bob.trainer.KMeansTrainer()
      kmeans_trainer.initialize(kmeans_machine, data)
      utils.ensure_dir(os.path.dirname(output_file))
      kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
      utils.info("UBM training: saved initial KMeans machine to '%s'" % output_file)
示例#20
0
  def feature_normalization(self, indices, force=False):
    """Normalizes the list of features to have zero mean and unit variance (parallel)"""
    normalized_list = self.m_file_selector.training_feature_list()

    utils.info("UBM training: normalizing features from range(%d, %d)" % indices)

    # iterate through the files and normalize the features
    for index in range(indices[0], indices[1]):
      feature = bob.io.load(str(training_list[index]))

      mean, std = self.m_tool.__normalize_std_array__(feature)

      if self.m_tool_chain.__check_file__(normalized_list[index], force):
        utils.debug("Skipping file '%s'" % normalized_list[index])
      else:
        utils.ensure_dir(os.path.dirname(normalized_list[index]))
        f = bob.io.HDF5File(str(normalized_list[index]), 'w')
        f.set('mean', mean)
        f.set('std', std)
        utils.debug("Saved normalized feature %s" %str(normalized_list[index]))
示例#21
0
def train_ivector(train_features, input_ubm_file):
  # load UBM
  ubm = bob.machine.GMMMachine(bob.io.HDF5File(input_ubm_file))

  # load GMM stats from training files
  gmm_stats = load_gmm_stats_list(input_ubm_file, train_features)  

  # Training IVector enroller
  output_file = 'model/enroller_ivector.hdf5'

  print "IVector training"
  # Perform IVector initialization
  ivector_machine = bob.machine.IVectorMachine(ubm, subspace_dimension_of_t) 
  ivector_machine.variance_threshold = variance_threshold

  # Creates the IVectorTrainer and trains the ivector machine
  ivector_trainer = bob.trainer.IVectorTrainer(update_sigma=True, convergence_threshold=variance_threshold, max_iterations=max_iterationss)
  ivector_trainer.train(ivector_machine, gmm_stats)
  utils.ensure_dir(os.path.dirname(output_file))
  ivector_machine.save(bob.io.HDF5File(output_file, 'w'))
  print "IVector training: saved enroller's IVector machine base to '%s'" % output_file
示例#22
0
    def preprocess_data(self, preprocessor, indices=None, force=False):
        """Preprocesses the original data with the given preprocessor."""
        # get the file lists
        data_files = self.m_file_selector.original_data_list()
        preprocessed_data_files = self.m_file_selector.preprocessed_data_list()

        # select a subset of keys to iterate
        if indices != None:
            index_range = range(indices[0], indices[1])
            utils.info("- Preprocessing: splitting of index range %s" %
                       str(indices))
        else:
            index_range = range(len(data_files))

        utils.ensure_dir(self.m_file_selector.preprocessed_directory)
        utils.info(
            "- Preprocessing: processing %d data files from directory '%s' to directory '%s'"
            % (len(index_range),
               self.m_file_selector.m_database.original_directory,
               self.m_file_selector.preprocessed_directory))

        # read annotation files
        annotation_list = self.m_file_selector.annotation_list()

        for i in index_range:
            preprocessed_data_file = preprocessed_data_files[i]

            if not self.__check_file__(preprocessed_data_file, force):
                data = preprocessor.read_original_data(str(data_files[i]))

                # get the annotations; might be None
                annotations = self.m_file_selector.get_annotations(
                    annotation_list[i])

                # call the preprocessor
                preprocessed_data = preprocessor(data, annotations)

                utils.ensure_dir(os.path.dirname(preprocessed_data_file))
                preprocessor.save_data(preprocessed_data,
                                       str(preprocessed_data_file))
示例#23
0
    def save_feature(self, feature, feature_file):
        """Save extracted SIFT features separated into keypoints and descriptors"""
        utils.ensure_dir(os.path.dirname(feature_file))

        l_key = 4  # Length of SIFT keypoint.
        l_desc = 128  # Length of the SIFT descriptors.
        l_feat = len(feature)  # Length of feature.

        sift_keypoints = numpy.ndarray(shape=(l_feat, l_key),
                                       dtype=feature[0].dtype)
        sift_descriptor = numpy.ndarray(shape=(l_feat, l_desc),
                                        dtype=feature[0].dtype)

        # Separate the keypoints and the descriptors.
        k = 0
        for val in feature:
            sift_keypoints[k] = val[0:4]
            sift_descriptor[k] = val[4:]
            k = k + 1

        # For this implementation, only descriptors are needed.
        bob.io.save(sift_descriptor, feature_file)
示例#24
0
    def feature_normalization(self, indices, force=False):
        """Normalizes the list of features to have zero mean and unit variance (parallel)"""
        normalized_list = self.m_file_selector.training_feature_list()

        utils.info("UBM training: normalizing features from range(%d, %d)" %
                   indices)

        # iterate through the files and normalize the features
        for index in range(indices[0], indices[1]):
            feature = bob.io.load(str(training_list[index]))

            mean, std = self.m_tool.__normalize_std_array__(feature)

            if self.m_tool_chain.__check_file__(normalized_list[index], force):
                utils.debug("Skipping file '%s'" % normalized_list[index])
            else:
                utils.ensure_dir(os.path.dirname(normalized_list[index]))
                f = bob.io.HDF5File(str(normalized_list[index]), 'w')
                f.set('mean', mean)
                f.set('std', std)
                utils.debug("Saved normalized feature %s" %
                            str(normalized_list[index]))
示例#25
0
    def gmm_estep(self, indices, force=False):
        """Performs a single E-step of the GMM training (parallel)."""
        stats_file = self.m_configuration.gmm_stats_file % (
            self.m_args.iteration, indices[0], indices[1])

        if self.m_tool_chain.__check_file__(stats_file, force, 1000):
            utils.info(
                "UBM training: Skipping GMM E-Step since the file '%s' already exists"
                % stats_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
            gmm_machine = bob.machine.GMMMachine(bob.io.HDF5File(machine_file))

            utils.info("UBM training: GMM E-Step from range(%d, %d)" % indices)

            # read data
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in range(indices[0], indices[1])
            ])

            gmm_trainer = bob.trainer.ML_GMMTrainer(
                self.m_tool.m_update_means, self.m_tool.m_update_variances,
                self.m_tool.m_update_weights)
            gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
            gmm_trainer.initialize(gmm_machine, data)

            # Calls the E-step and extracts the GMM statistics
            gmm_trainer.e_step(gmm_machine, data)
            gmm_stats = gmm_trainer.gmm_statistics

            # Saves the GMM statistics to the file
            utils.ensure_dir(os.path.dirname(stats_file))
            gmm_stats.save(bob.io.HDF5File(stats_file, 'w'))
            utils.info("UBM training: Wrote GMM stats '%s'" % (stats_file))
示例#26
0
def train_ivector(train_features, input_ubm_file):
    # load UBM
    ubm = bob.machine.GMMMachine(bob.io.HDF5File(input_ubm_file))

    # load GMM stats from training files
    gmm_stats = load_gmm_stats_list(input_ubm_file, train_features)

    # Training IVector enroller
    output_file = 'model/enroller_ivector.hdf5'

    print "IVector training"
    # Perform IVector initialization
    ivector_machine = bob.machine.IVectorMachine(ubm, subspace_dimension_of_t)
    ivector_machine.variance_threshold = variance_threshold

    # Creates the IVectorTrainer and trains the ivector machine
    ivector_trainer = bob.trainer.IVectorTrainer(
        update_sigma=True,
        convergence_threshold=variance_threshold,
        max_iterations=max_iterationss)
    ivector_trainer.train(ivector_machine, gmm_stats)
    utils.ensure_dir(os.path.dirname(output_file))
    ivector_machine.save(bob.io.HDF5File(output_file, 'w'))
    print "IVector training: saved enroller's IVector machine base to '%s'" % output_file
 def zt_norm_file(self, model_id, group):
   """Returns the score text file after ZT-normalization for the given model id of the given group."""
   zt_norm_dir = os.path.join(self.score_directories[1], group)
   utils.ensure_dir(zt_norm_dir)
   return os.path.join(zt_norm_dir, str(model_id) + ".txt")
 def no_norm_result_file(self, group):
   """Returns the resulting score text file for the given group."""
   no_norm_dir = self.score_directories[0]
   utils.ensure_dir(no_norm_dir)
   return os.path.join(no_norm_dir, "scores-" + group)
示例#29
0
    def enroll_models(self,
                      tool,
                      extractor,
                      compute_zt_norm,
                      indices=None,
                      groups=['dev', 'eval'],
                      types=['N', 'T'],
                      force=False):
        """Enroll the models for 'dev' and 'eval' groups, for both models and T-Norm-models.
       This function uses the extracted or projected features to compute the models,
       depending on your setup of the base class Tool."""

        # read the projector file, if needed
        tool.load_projector(self.m_file_selector.projector_file)
        # read the model enrollment file
        tool.load_enroller(self.m_file_selector.enroller_file)

        # which tool to use to read the features...
        reader = tool if tool.use_projected_features_for_enrollment else extractor

        # Create Models
        if 'N' in types:
            for group in groups:
                model_ids = self.m_file_selector.model_ids(group)

                if indices != None:
                    model_ids = model_ids[indices[0]:indices[1]]
                    utils.info("- Enrollment: splitting of index range %s" %
                               str(indices))

                utils.info("- Enrollment: enrolling models of group '%s'" %
                           group)
                for model_id in model_ids:
                    # Path to the model
                    model_file = self.m_file_selector.model_file(
                        model_id, group)

                    # Removes old file if required
                    if not self.__check_file__(model_file, force):
                        enroll_files = self.m_file_selector.enroll_files(
                            model_id, group, 'projected'
                            if tool.use_projected_features_for_enrollment else
                            'features')

                        # load all files into memory
                        enroll_features = [
                            reader.read_feature(str(enroll_file))
                            for enroll_file in enroll_files
                        ]

                        model = tool.enroll(enroll_features)
                        # save the model
                        utils.ensure_dir(os.path.dirname(model_file))
                        tool.save_model(model, str(model_file))

        # T-Norm-Models
        if 'T' in types and compute_zt_norm:
            for group in groups:
                t_model_ids = self.m_file_selector.t_model_ids(group)

                if indices != None:
                    t_model_ids = t_model_ids[indices[0]:indices[1]]
                    utils.info("- Enrollment: splitting of index range %s" %
                               str(indices))

                utils.info("- Enrollment: enrolling T-models of group '%s'" %
                           group)
                for t_model_id in t_model_ids:
                    # Path to the model
                    t_model_file = self.m_file_selector.t_model_file(
                        t_model_id, group)

                    # Removes old file if required
                    if not self.__check_file__(t_model_file, force):
                        t_enroll_files = self.m_file_selector.t_enroll_files(
                            t_model_id, group, 'projected'
                            if tool.use_projected_features_for_enrollment else
                            'features')

                        # load all files into memory
                        t_enroll_features = [
                            reader.read_feature(str(t_enroll_file))
                            for t_enroll_file in t_enroll_files
                        ]

                        t_model = tool.enroll(t_enroll_features)
                        # save model
                        utils.ensure_dir(os.path.dirname(t_model_file))
                        tool.save_model(t_model, str(t_model_file))
 def c_file_for_model(self, model_id, group):
   """Returns the C-file for the given model id that is used for computing ZT normalization."""
   c_dir = os.path.join(self.zt_score_directories[2], group)
   utils.ensure_dir(c_dir)
   return os.path.join(c_dir, str(model_id) + self.default_extension)
 def d_file(self, t_model_id, group):
   """Returns the D-file for the given T-model id that is used for computing ZT normalization."""
   d_dir = os.path.join(self.zt_score_directories[3], group)
   utils.ensure_dir(d_dir)
   return os.path.join(d_dir, str(t_model_id) + self.default_extension)
 def d_matrix_file(self, group):
   """Returns the D-file for storing all scores for pairs of T-models and Z-probes."""
   d_dir = os.path.join(self.zt_score_directories[3], group)
   utils.ensure_dir(d_dir)
   return os.path.join(d_dir, "D" + self.default_extension)
 def d_same_value_file(self, t_model_id, group):
   """Returns the specific D-file for storing which pairs of the given T-model id and all Z-probes are intrapersonal or extrapersonal."""
   d_dir = os.path.join(self.zt_score_directories[4], group)
   utils.ensure_dir(d_dir)
   return os.path.join(d_dir, str(t_model_id) + self.default_extension)
 def d_same_value_matrix_file(self, group):
   """Returns the specific D-file for storing which pairs of T-models and Z-probes are intrapersonal or extrapersonal."""
   d_dir = os.path.join(self.zt_score_directories[4], group)
   utils.ensure_dir(d_dir)
   return os.path.join(d_dir, "D_sameValue" + self.default_extension)
 def calibrated_score_file(self, group, zt_norm=False):
   """Returns the directory where calibrated scores can be found."""
   calibration_dir = self.score_directories[1 if zt_norm else 0]
   utils.ensure_dir(calibration_dir)
   return os.path.join(calibration_dir, "calibrated-" + group)
示例#36
0
    def kmeans_mstep(self, counts, force=False):
        """Performs a single M-step of the K-Means algorithm (non-parallel)"""
        old_machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
        new_machine_file = self.m_configuration.kmeans_intermediate_file % (
            self.m_args.iteration + 1)

        if self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans M-Step since the file '%s' already exists"
                % new_machine_file)
        else:
            # get the files from e-step
            training_list = self.m_file_selector.training_feature_list()

            # try if there is one file containing all data
            if os.path.exists(self.m_configuration.kmeans_stats_file %
                              (self.m_args.iteration, 0, len(training_list))):
                stats_file = self.m_configuration.kmeans_stats_file % (
                    self.m_args.iteration, 0, len(training_list))
                # load stats file
                zeroeth, first, nsamples, dist = self.read_stats(stats_file)
            else:
                # load several files
                job_ids = range(
                    self.__generate_job_array__(training_list, counts)[1])
                job_indices = [(counts * job_id,
                                min(counts * (job_id + 1), len(training_list)))
                               for job_id in job_ids]
                stats_files = [
                    self.m_configuration.kmeans_stats_file %
                    (self.m_args.iteration, indices[0], indices[1])
                    for indices in job_indices
                ]

                # read all stats files
                zeroeth, first, nsamples, dist = self.read_stats(
                    stats_files[0])
                for stats_file in stats_files[1:]:
                    zeroeth_, first_, nsamples_, dist_ = self.read_stats(
                        stats_file)
                    zeroeth += zeroeth_
                    first += first_
                    nsamples += nsamples_
                    dist += dist_

            # read some features (needed for computation, but not really required)
            data = numpy.array(bob.io.load(str(training_list[0])))

            # Creates the KMeansTrainer
            kmeans_trainer = bob.trainer.KMeansTrainer()
            # Creates the KMeansMachine
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(old_machine_file))
            kmeans_trainer.initialize(kmeans_machine, data)

            kmeans_trainer.zeroeth_order_statistics = zeroeth
            kmeans_trainer.first_order_statistics = first
            kmeans_trainer.average_min_distance = dist

            # Performs the M-step
            kmeans_trainer.m_step(kmeans_machine,
                                  data)  # data is not used in M-step
            utils.info("UBM training: Performed M step %d with result %f" %
                       (self.m_args.iteration, dist / nsamples))

            # Save the K-Means model
            utils.ensure_dir(os.path.dirname(new_machine_file))
            kmeans_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
            shutil.copy(new_machine_file, self.m_configuration.kmeans_file)
            utils.info("UBM training: Wrote new KMeans machine '%s'" %
                       new_machine_file)

        if self.m_args.clean_intermediate and self.m_args.iteration > 0:
            old_file = self.m_configuration.kmeans_intermediate_file % (
                self.m_args.iteration - 1)
            utils.info("Removing old intermediate directory '%s'" %
                       os.path.dirname(old_file))
            shutil.rmtree(os.path.dirname(old_file))
示例#37
0
    def gmm_mstep(self, counts, force=False):
        """Performs a single M-step of the GMM training (non-parallel)"""
        old_machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
        new_machine_file = self.m_configuration.gmm_intermediate_file % (
            self.m_args.iteration + 1)

        if self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
            utils.info(
                "UBM training: Skipping GMM M-Step since the file '%s' already exists"
                % new_machine_file)
        else:
            # get the files from e-step
            training_list = self.m_file_selector.training_feature_list()

            # try if there is one file containing all data
            if os.path.exists(self.m_configuration.gmm_stats_file %
                              (self.m_args.iteration, 0, len(training_list))):
                stats_file = self.m_configuration.gmm_stats_file % (
                    self.m_args.iteration, 0, len(training_list))
                # load stats file
                gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_file))
            else:
                # load several files
                job_ids = range(
                    self.__generate_job_array__(training_list, counts)[1])
                job_indices = [(counts * job_id,
                                min(counts * (job_id + 1), len(training_list)))
                               for job_id in job_ids]
                stats_files = [
                    self.m_configuration.gmm_stats_file %
                    (self.m_args.iteration, indices[0], indices[1])
                    for indices in job_indices
                ]

                # read all stats files
                gmm_stats = bob.machine.GMMStats(
                    bob.io.HDF5File(stats_files[0]))
                for stats_file in stats_files[1:]:
                    gmm_stats += bob.machine.GMMStats(
                        bob.io.HDF5File(stats_file))

            # read some features (needed for computation, but not really required)
            data = numpy.array(bob.io.load(str(training_list[0])))

            # load the old gmm machine
            gmm_machine = bob.machine.GMMMachine(
                bob.io.HDF5File(old_machine_file))
            # initialize the trainer
            gmm_trainer = bob.trainer.ML_GMMTrainer(
                self.m_tool.m_update_means, self.m_tool.m_update_variances,
                self.m_tool.m_update_weights)
            gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
            gmm_trainer.initialize(gmm_machine, data)
            gmm_trainer.gmm_statistics = gmm_stats

            # Calls M-step
            gmm_trainer.m_step(gmm_machine, data)

            # Saves the GMM statistics to the file
            utils.ensure_dir(os.path.dirname(new_machine_file))
            gmm_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
            import shutil
            shutil.copy(new_machine_file, self.m_configuration.projector_file)

        if self.m_args.clean_intermediate and self.m_args.iteration > 0:
            old_file = self.m_configuration.gmm_intermediate_file % (
                self.m_args.iteration - 1)
            utils.info("Removing old intermediate directory '%s'" %
                       os.path.dirname(old_file))
            shutil.rmtree(os.path.dirname(old_file))
 def no_norm_file(self, model_id, group):
   """Returns the score text file for the given model id of the given group."""
   no_norm_dir = os.path.join(self.score_directories[0], group)
   utils.ensure_dir(no_norm_dir)
   return os.path.join(no_norm_dir, str(model_id) + ".txt")
 def zt_norm_result_file(self, group):
   """Returns the resulting score text file after ZT-normalization for the given group."""
   zt_norm_dir = self.score_directories[1]
   utils.ensure_dir(zt_norm_dir)
   return os.path.join(zt_norm_dir, "scores-" + group)