Exemplo n.º 1
0
    def project_features(self, tool, extractor, indices=None, force=False):
        """Projects the features for all files of the database."""
        # load the projector file
        if tool.performs_projection:
            tool.load_projector(str(self.m_file_selector.projector_file))

            feature_files = self.m_file_selector.feature_list()
            projected_files = self.m_file_selector.projected_list()

            # select a subset of indices to iterate
            if indices != None:
                index_range = range(indices[0], indices[1])
                utils.info("- Projection: splitting of index range %s" %
                           str(indices))
            else:
                index_range = range(len(feature_files))

            utils.ensure_dir(self.m_file_selector.projected_directory)
            utils.info(
                "- Projection: projecting %d features from directory '%s' to directory '%s'"
                % (len(index_range), self.m_file_selector.features_directory,
                   self.m_file_selector.projected_directory))
            # extract the features
            for i in index_range:
                feature_file = feature_files[i]
                projected_file = projected_files[i]

                if not self.__check_file__(projected_file, force):
                    # load feature
                    feature = extractor.read_feature(str(feature_file))
                    # project feature
                    projected = tool.project(feature)
                    # write it
                    utils.ensure_dir(os.path.dirname(projected_file))
                    tool.save_feature(projected, str(projected_file))
Exemplo n.º 2
0
    def __train_kmeans__(self, feature_space):
        """Compute KMeans classification of the data"""

        utils.info("  -> Training KMeans")

        # Form the feature space for training KMeans.
        data_list = []
        for client in feature_space:
            for feature in client:
                data_list.append(feature)
        data = numpy.vstack(data_list)
        del data_list

        # Compute the number of clusers of KMeans.
        global m_kmeans_means
        self.m_kmeans_means = numpy.uint32(
            data.shape[0] * self.m_kmeans_means) if isinstance(
                self.m_kmeans_means, float) else self.m_kmeans_means

        # Machine.
        dimension = feature_space[0].shape[1]
        kmeans = bob.machine.KMeansMachine(self.m_kmeans_means, dimension)

        # Training.
        t = bob.trainer.KMeansTrainer()
        t.max_iterations = self.m_kmeans_training_iterations
        t.convergence_threshold = self.m_kmeans_training_threshold

        t.train(kmeans, data)
        del data

        # Return machine.
        return kmeans
Exemplo n.º 3
0
    def train_enroller(self, tool, extractor, force=False):
        """Trains the model enroller using the extracted or projected features, depending on your setup of the base class Tool."""
        reader = tool if tool.use_projected_features_for_enrollment else extractor
        if tool.requires_enroller_training:
            enroller_file = self.m_file_selector.enroller_file

            if self.__check_file__(enroller_file, force, 1000):
                utils.info("- Enrollment: enroller '%s' already exists." %
                           enroller_file)
            else:
                utils.ensure_dir(os.path.dirname(enroller_file))
                # first, load the projector
                tool.load_projector(str(self.m_file_selector.projector_file))
                # training models
                train_files = self.m_file_selector.training_list(
                    'projected' if tool.use_projected_features_for_enrollment
                    else 'features',
                    'train_enroller',
                    arrange_by_client=True)
                train_features = self.__read_features_by_client__(
                    train_files, reader)

                # perform training
                utils.info(
                    "- Enrollment: training enroller '%s' using %d identities: "
                    % (enroller_file, len(train_features)))
                tool.train_enroller(train_features, str(enroller_file))
Exemplo n.º 4
0
    def extract_features(self,
                         extractor,
                         preprocessor,
                         indices=None,
                         force=False):
        """Extracts the features from the preprocessed data using the given extractor."""
        extractor.load(str(self.m_file_selector.extractor_file))
        data_files = self.m_file_selector.preprocessed_data_list()
        feature_files = self.m_file_selector.feature_list()

        # select a subset of indices to iterate
        if indices != None:
            index_range = range(indices[0], indices[1])
            utils.info("- Extraction: splitting of index range %s" %
                       str(indices))
        else:
            index_range = range(len(data_files))

        utils.ensure_dir(self.m_file_selector.features_directory)
        utils.info(
            "- Extraction: extracting %d features from directory '%s' to directory '%s'"
            % (len(index_range), self.m_file_selector.preprocessed_directory,
               self.m_file_selector.features_directory))
        for i in index_range:
            data_file = data_files[i]
            feature_file = feature_files[i]

            if not self.__check_file__(feature_file, force):
                # load data
                data = preprocessor.read_data(str(data_file))
                # extract feature
                feature = extractor(data, data_file)
                # Save feature
                utils.ensure_dir(os.path.dirname(feature_file))
                extractor.save_feature(feature, str(feature_file))
Exemplo n.º 5
0
def _plot_scores(figsize, args, totalModels, ids_group, group, title):
  figure = mpl.figure(figsize=figsize)
  
  eer_mean = []
  thr_mean = [] 
  print("List of the non zero EER Models:")
  for model in range(len(totalModels)):
    
    scores = ids_group[ids_group[:,0] == totalModels[model], :]
    scoresTarget = scores[scores[:,4]=='1',3].astype(numpy.float64)
    scoresNonTarget = scores[scores[:,4]=='0',3].astype(numpy.float64)
    
    # compute threshold on development set
    threshold = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scoresNonTarget, scoresTarget)
    thr_mean.append(threshold)
    # apply threshold to development set
    far, frr = bob.measure.farfrr(scoresNonTarget, scoresTarget, threshold)
    #print("Model %s - The %s of the development set of '%s' is %2.3f%%" % (model, args.criterion, args.legends[i] if args.legends else args.dev_files[i], (far + frr) * 50.)) # / 2 * 100%
    eer = 100. * bob.measure.eer_rocch(scoresNonTarget, scoresTarget)
    eer_mean.append(eer)
    if (eer != 0):
      print("Model %s - The %s of the %s set is %2.3f%%" % (totalModels[model], args.criterion, group, eer )) # / 2 * 100%

    utils.info("Plotting Fauna graph to file '%s'" % args.pdf)
    # plot scoresTarget
    blue_dot,  = mpl.plot(numpy.ones(len(scoresTarget))*model, scoresTarget, 'bo')
    # plot scoresNonTarget        
    red_cross, = mpl.plot(numpy.ones(len(scoresNonTarget))*model, scoresNonTarget, 'rx')   
    # plot threshold
    black_th,  = mpl.plot(model, threshold, 'k*', markersize=16)
  
  # finalize plot
  offset = 0.01
  mpl.ylabel('Score norm')
  if (args.norm == 'none'):
    offset = numpy.mean(ids_group[:,3].astype(numpy.float64))/100
    mpl.ylabel('Score')

  mpl.axis([-1,len(totalModels), min(ids_group[:,3].astype(numpy.float64))-offset, max(ids_group[:,3].astype(numpy.float64))+offset])
  mpl.xticks(range(0,len(totalModels)+1,5))
  #mpl.xticks(range(0,len(totalModels),10), totalModels[range(0,len(totalModels),10)].astype(numpy.str), rotation = 'vertical')
  mpl.xlabel('User model')
  mpl.grid(True, color=(0.6,0.6,0.6))
  
  #thr_mean = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scores_dev[i][0], scores_dev[i][1])
  #mpl.axhline(y=thr_mean, xmin=0, xmax=1, c="blue", linewidth=1.5, zorder=0)
  
  #mpl.text(60, .025, r'$\mu=100,\ \sigma=15$')
  #mpl.annotate('local max', xy=(2, 1), xytext=(3, 1.5),arrowprops=dict(facecolor='black', shrink=0.05),)
  mpl.legend([blue_dot, red_cross, black_th], ["Target scores", "NonTarget scores", "Model threshold"], numpoints=1)
  mpl.legend(loc=5)
  #mpl.legend(handles = [blue_dot, red_cross, black_th],["Target scores", "NonTarget scores", "Model threshold"])
  
  mpl.title(title)

  return eer_mean, figure
Exemplo n.º 6
0
    def calibrate_scores(self,
                         norms=['nonorm', 'ztnorm'],
                         groups=['dev', 'eval'],
                         prior=0.5):
        """Calibrates the score files by learning a linear calibration from the dev files (first element of the groups) and executing the on all groups, separately for all given norms."""
        # read score files of the first group
        for norm in norms:
            training_score_file = self.m_file_selector.no_norm_result_file(
                groups[0]
            ) if norm == 'nonorm' else self.m_file_selector.zt_norm_result_file(
                groups[0]) if norm is 'ztnorm' else None

            # create a LLR trainer
            utils.info(
                " - Calibration: Training calibration for type %s from group %s"
                % (norm, groups[0]))
            llr_trainer = bob.trainer.CGLogRegTrainer(prior, 1e-16, 100000)

            training_scores = list(
                bob.measure.load.split_four_column(training_score_file))
            for i in (0, 1):
                h = numpy.array(training_scores[i])
                h.shape = (len(training_scores[i]), 1)
                training_scores[i] = h
            # train the LLR
            llr_machine = llr_trainer.train(training_scores[0],
                                            training_scores[1])
            del training_scores
            utils.debug(
                "   ... Resulting calibration parameters: shift = %f, scale = %f"
                % (llr_machine.biases[0], llr_machine.weights[0, 0]))

            # now, apply it to all groups
            for group in groups:
                score_file = self.m_file_selector.no_norm_result_file(
                    group
                ) if norm == 'nonorm' else self.m_file_selector.zt_norm_result_file(
                    group) if norm is 'ztnorm' else None
                calibrated_file = self.m_file_selector.calibrated_score_file(
                    group, norm == 'ztnorm')

                utils.info(
                    " - Calibration: calibrating scores from '%s' to '%s'" %
                    (score_file, calibrated_file))

                # iterate through the score file and calibrate scores
                scores = bob.measure.load.four_column(score_file)
                with open(calibrated_file, 'w') as f:
                    for line in scores:
                        assert len(line) == 4
                        calibrated_score = llr_machine([line[3]])
                        f.write('%s %s %s ' % line[0:3] +
                                str(calibrated_score[0]) + "\n")
Exemplo n.º 7
0
    def __scores_d__(self, t_model_ids, group, force, preload_probes):
        """Computes D scores."""
        # probe files:
        z_probe_objects = self.m_file_selector.z_probe_objects(group)
        z_probe_files = self.m_file_selector.get_paths(
            z_probe_objects,
            'projected' if self.m_use_projected_dir else 'features')

        # preload the probe files for a faster access (and fewer network load)
        if preload_probes:
            utils.info("- Scoring: preloading Z-probe files of group '%s'" %
                       group)
            # read all probe files into memory
            if self.m_file_selector.uses_probe_file_sets():
                preloaded_z_probes = [[
                    self.m_tool.read_probe(str(z_probe_file))
                    for z_probe_file in file_set
                ] for file_set in z_probe_files]
            else:
                preloaded_z_probes = [
                    self.m_tool.read_probe(str(z_probe_file))
                    for z_probe_file in z_probe_files
                ]

        utils.info("- Scoring: computing score matrix D for group '%s'" %
                   group)

        # Gets the Z-Norm impostor samples
        z_probe_ids = []
        for z_probe_object in z_probe_objects:
            z_probe_ids.append(z_probe_object.client_id)

        # Loads the T-Norm models
        for t_model_id in t_model_ids:
            # test if the file is already there
            score_file = self.m_file_selector.d_same_value_file(
                t_model_id, group)
            if self.__check_file__(score_file, force):
                utils.warn("score file '%s' already exists." % (score_file))
            else:
                t_model = self.m_tool.read_model(
                    self.m_file_selector.t_model_file(t_model_id, group))
                if preload_probes:
                    d = self.__scores_preloaded__(t_model, preloaded_z_probes)
                else:
                    d = self.__scores__(t_model, z_probe_files)
                bob.io.save(d, self.m_file_selector.d_file(t_model_id, group))

                t_client_id = [self.m_file_selector.client_id(t_model_id)]
                d_same_value_tm = bob.machine.ztnorm_same_value(
                    t_client_id, z_probe_ids)
                bob.io.save(d_same_value_tm, score_file)
Exemplo n.º 8
0
  def gmm_mstep(self, counts, force=False):
    """Performs a single M-step of the GMM training (non-parallel)"""
    old_machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
    new_machine_file = self.m_configuration.gmm_intermediate_file % (self.m_args.iteration + 1)

    if  self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
      utils.info("UBM training: Skipping GMM M-Step since the file '%s' already exists" % new_machine_file)
    else:
      # get the files from e-step
      training_list = self.m_file_selector.training_feature_list()

      # try if there is one file containing all data
      if os.path.exists(self.m_configuration.gmm_stats_file % (self.m_args.iteration, 0, len(training_list))):
        stats_file = self.m_configuration.gmm_stats_file % (self.m_args.iteration, 0, len(training_list))
        # load stats file
        gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_file))
      else:
        # load several files
        job_ids = range(self.__generate_job_array__(training_list, counts)[1])
        job_indices = [(counts * job_id, min(counts * (job_id+1), len(training_list))) for job_id in job_ids]
        stats_files = [self.m_configuration.gmm_stats_file % (self.m_args.iteration, indices[0], indices[1]) for indices in job_indices]

        # read all stats files
        gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_files[0]))
        for stats_file in stats_files[1:]:
          gmm_stats += bob.machine.GMMStats(bob.io.HDF5File(stats_file))

      # read some features (needed for computation, but not really required)
      data = numpy.array(bob.io.load(str(training_list[0])))

      # load the old gmm machine
      gmm_machine =  bob.machine.GMMMachine(bob.io.HDF5File(old_machine_file))
      # initialize the trainer
      gmm_trainer = bob.trainer.ML_GMMTrainer(self.m_tool.m_update_means, self.m_tool.m_update_variances, self.m_tool.m_update_weights)
      gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
      gmm_trainer.initialize(gmm_machine, data)
      gmm_trainer.gmm_statistics = gmm_stats

      # Calls M-step
      gmm_trainer.m_step(gmm_machine, data)

      # Saves the GMM statistics to the file
      utils.ensure_dir(os.path.dirname(new_machine_file))
      gmm_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
      import shutil
      shutil.copy(new_machine_file, self.m_configuration.projector_file)

    if self.m_args.clean_intermediate and self.m_args.iteration > 0:
      old_file = self.m_configuration.gmm_intermediate_file % (self.m_args.iteration-1)
      utils.info("Removing old intermediate directory '%s'" % os.path.dirname(old_file))
      shutil.rmtree(os.path.dirname(old_file))
Exemplo n.º 9
0
  def kmeans_mstep(self, counts, force=False):
    """Performs a single M-step of the K-Means algorithm (non-parallel)"""
    old_machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
    new_machine_file = self.m_configuration.kmeans_intermediate_file % (self.m_args.iteration + 1)

    if  self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
      utils.info("UBM training: Skipping KMeans M-Step since the file '%s' already exists" % new_machine_file)
    else:
      # get the files from e-step
      training_list = self.m_file_selector.training_feature_list()

      # try if there is one file containing all data
      if os.path.exists(self.m_configuration.kmeans_stats_file % (self.m_args.iteration, 0, len(training_list))):
        stats_file = self.m_configuration.kmeans_stats_file % (self.m_args.iteration, 0, len(training_list))
        # load stats file
        zeroeth, first, nsamples, dist = self.read_stats(stats_file)
      else:
        # load several files
        job_ids = range(self.__generate_job_array__(training_list, counts)[1])
        job_indices = [(counts * job_id, min(counts * (job_id+1), len(training_list))) for job_id in job_ids]
        stats_files = [self.m_configuration.kmeans_stats_file % (self.m_args.iteration, indices[0], indices[1]) for indices in job_indices]

        # read all stats files
        zeroeth, first, nsamples, dist = self.read_stats(stats_files[0])
        for stats_file in stats_files[1:]:
          zeroeth_, first_, nsamples_, dist_ = self.read_stats(stats_file)
          zeroeth += zeroeth_
          first += first_
          nsamples += nsamples_
          dist += dist_

      # read some features (needed for computation, but not really required)
      data = numpy.array(bob.io.load(str(training_list[0])))

      # Creates the KMeansTrainer
      kmeans_trainer = bob.trainer.KMeansTrainer()
      # Creates the KMeansMachine
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(old_machine_file))
      kmeans_trainer.initialize(kmeans_machine, data)

      kmeans_trainer.zeroeth_order_statistics = zeroeth
      kmeans_trainer.first_order_statistics = first
      kmeans_trainer.average_min_distance = dist

      # Performs the M-step
      kmeans_trainer.m_step(kmeans_machine, data) # data is not used in M-step
      utils.info("UBM training: Performed M step %d with result %f" % (self.m_args.iteration, dist/nsamples))

      # Save the K-Means model
      utils.ensure_dir(os.path.dirname(new_machine_file))
      kmeans_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
      shutil.copy(new_machine_file, self.m_configuration.kmeans_file)
      utils.info("UBM training: Wrote new KMeans machine '%s'" % new_machine_file)

    if self.m_args.clean_intermediate and self.m_args.iteration > 0:
      old_file = self.m_configuration.kmeans_intermediate_file % (self.m_args.iteration-1)
      utils.info("Removing old intermediate directory '%s'" % os.path.dirname(old_file))
      shutil.rmtree(os.path.dirname(old_file))
Exemplo n.º 10
0
    def concatenate(self, compute_zt_norm, groups=['dev', 'eval']):
        """Concatenates all results into one (or two) score files per group."""
        for group in groups:
            utils.info("- Scoring: concatenating score files for group '%s'" %
                       group)
            # (sorted) list of models
            model_ids = self.m_file_selector.model_ids(group)

            with open(self.m_file_selector.no_norm_result_file(group),
                      'w') as f:
                # Concatenates the scores
                for model_id in model_ids:
                    model_file = self.m_file_selector.no_norm_file(
                        model_id, group)
                    if not os.path.exists(model_file):
                        f.close()
                        os.remove(
                            self.m_file_selector.no_norm_result_file(group))
                        raise IOError(
                            "The score file '%s' cannot be found. Aborting!" %
                            model_file)

                    with open(model_file, 'r') as res_file:
                        f.write(res_file.read())

            if compute_zt_norm:
                with open(self.m_file_selector.zt_norm_result_file(group),
                          'w') as f:
                    # Concatenates the scores
                    for model_id in model_ids:
                        model_file = self.m_file_selector.zt_norm_file(
                            model_id, group)
                        if not os.path.exists(model_file):
                            f.close()
                            os.remove(
                                self.m_file_selector.zt_norm_result_file(
                                    group))
                            raise IOError(
                                "The score file '%s' cannot be found. Aborting!"
                                % model_file)

                        with open(model_file, 'r') as res_file:
                            f.write(res_file.read())
Exemplo n.º 11
0
  def feature_normalization(self, indices, force=False):
    """Normalizes the list of features to have zero mean and unit variance (parallel)"""
    normalized_list = self.m_file_selector.training_feature_list()

    utils.info("UBM training: normalizing features from range(%d, %d)" % indices)

    # iterate through the files and normalize the features
    for index in range(indices[0], indices[1]):
      feature = bob.io.load(str(training_list[index]))

      mean, std = self.m_tool.__normalize_std_array__(feature)

      if self.m_tool_chain.__check_file__(normalized_list[index], force):
        utils.debug("Skipping file '%s'" % normalized_list[index])
      else:
        utils.ensure_dir(os.path.dirname(normalized_list[index]))
        f = bob.io.HDF5File(str(normalized_list[index]), 'w')
        f.set('mean', mean)
        f.set('std', std)
        utils.debug("Saved normalized feature %s" %str(normalized_list[index]))
Exemplo n.º 12
0
    def __scores_c__(self, t_model_ids, group, force, preload_probes):
        """Computes C scores."""
        # probe files:
        probe_objects = self.m_file_selector.probe_objects(group)
        probe_files = self.m_file_selector.get_paths(
            probe_objects,
            'projected' if self.m_use_projected_dir else 'features')

        # preload the probe files for a faster access (and fewer network load)
        if preload_probes:
            utils.info("- Scoring: preloading probe files of group '%s'" %
                       group)
            # read all probe files into memory
            if self.m_file_selector.uses_probe_file_sets():
                preloaded_probes = [[
                    self.m_tool.read_probe(str(probe_file))
                    for probe_file in file_set
                ] for file_set in all_probe_files]
            else:
                preloaded_probes = [
                    self.m_tool.read_probe(str(probe_file))
                    for probe_file in probe_files
                ]

        utils.info("- Scoring: computing score matrix C for group '%s'" %
                   group)

        # Computes the raw scores for the T-Norm model
        for t_model_id in t_model_ids:
            # test if the file is already there
            score_file = self.m_file_selector.c_file(t_model_id, group)
            if self.__check_file__(score_file, force):
                utils.warn("score file '%s' already exists." % (score_file))
            else:
                t_model = self.m_tool.read_model(
                    self.m_file_selector.t_model_file(t_model_id, group))
                if preload_probes:
                    c = self.__scores_preloaded__(t_model, preloaded_probes)
                else:
                    c = self.__scores__(t_model, probe_files)
                bob.io.save(c, score_file)
Exemplo n.º 13
0
  def kmeans_estep(self, indices, force=False):
    """Performs a single E-step of the K-Means algorithm (parallel)"""
    stats_file = self.m_configuration.kmeans_stats_file % (self.m_args.iteration, indices[0], indices[1])

    if  self.m_tool_chain.__check_file__(stats_file, force, 1000):
      utils.info("UBM training: Skipping KMeans E-Step since the file '%s' already exists" % stats_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(machine_file))

      utils.info("UBM training: KMeans E-Step from range(%d, %d)" % indices)

      # read data
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in range(indices[0], indices[1])])

      kmeans_trainer = bob.trainer.KMeansTrainer()
      t = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1]) # Temporary Kmeans machine required for trainer initialization
      kmeans_trainer.initialize(t, data)

      # Performs the E-step
      kmeans_trainer.e_step(kmeans_machine, data)

      # write results to file
      dist = numpy.array([kmeans_trainer.average_min_distance])
      nsamples = numpy.array([indices[1] - indices[0]], dtype=numpy.float64)

      utils.ensure_dir(os.path.dirname(stats_file))
      f = bob.io.HDF5File(stats_file, 'w')
      f.set('zeros', kmeans_trainer.zeroeth_order_statistics)
      f.set('first', kmeans_trainer.first_order_statistics)
      f.set('dist', dist * nsamples)
      f.set('nsamples', nsamples)
      utils.info("UBM training: Wrote Stats file '%s'" % stats_file)
Exemplo n.º 14
0
  def gmm_initialize(self, force=False):
    """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
    output_file = self.m_configuration.gmm_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 800):
      utils.info("UBM Training: Skipping GMM initialization since '%s' already exists" % output_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      utils.info("UBM Training: Initializing GMM")

      # load KMeans machine
      kmeans_machine = bob.machine.KMeansMachine(bob.io.HDF5File(self.m_configuration.kmeans_file))

      # read features
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Create initial GMM Machine
      gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians, data.shape[1])

      [variances, weights] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

      # Initializes the GMM
      gmm_machine.means = kmeans_machine.means
      gmm_machine.variances = variances
      gmm_machine.weights = weights
      gmm_machine.set_variance_thresholds(self.m_tool.m_variance_threshold)

      utils.ensure_dir(os.path.dirname(output_file))
      gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
      utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
Exemplo n.º 15
0
    def kmeans_initialize(self, force=False):
        """Initializes the K-Means training (non-parallel)."""
        output_file = self.m_configuration.kmeans_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans initialization since the file '%s' already exists"
                % output_file)
        else:
            # read data
            utils.info("UBM training: initializing kmeans")
            training_list = self.m_file_selector.training_feature_list()
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Perform KMeans initialization
            kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians,
                                                       data.shape[1])
            # Creates the KMeansTrainer and call the initialization procedure
            kmeans_trainer = bob.trainer.KMeansTrainer()
            kmeans_trainer.initialize(kmeans_machine, data)
            utils.ensure_dir(os.path.dirname(output_file))
            kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
            utils.info("UBM training: saved initial KMeans machine to '%s'" %
                       output_file)
Exemplo n.º 16
0
    def preprocess_data(self, preprocessor, indices=None, force=False):
        """Preprocesses the original data with the given preprocessor."""
        # get the file lists
        data_files = self.m_file_selector.original_data_list()
        preprocessed_data_files = self.m_file_selector.preprocessed_data_list()

        # select a subset of keys to iterate
        if indices != None:
            index_range = range(indices[0], indices[1])
            utils.info("- Preprocessing: splitting of index range %s" %
                       str(indices))
        else:
            index_range = range(len(data_files))

        utils.ensure_dir(self.m_file_selector.preprocessed_directory)
        utils.info(
            "- Preprocessing: processing %d data files from directory '%s' to directory '%s'"
            % (len(index_range),
               self.m_file_selector.m_database.original_directory,
               self.m_file_selector.preprocessed_directory))

        # read annotation files
        annotation_list = self.m_file_selector.annotation_list()

        for i in index_range:
            preprocessed_data_file = preprocessed_data_files[i]

            if not self.__check_file__(preprocessed_data_file, force):
                data = preprocessor.read_original_data(str(data_files[i]))

                # get the annotations; might be None
                annotations = self.m_file_selector.get_annotations(
                    annotation_list[i])

                # call the preprocessor
                preprocessed_data = preprocessor(data, annotations)

                utils.ensure_dir(os.path.dirname(preprocessed_data_file))
                preprocessor.save_data(preprocessed_data,
                                       str(preprocessed_data_file))
Exemplo n.º 17
0
    def __scores_b__(self, model_ids, group, force, preload_probes):
        """Computes B scores."""
        # probe files:
        z_probe_objects = self.m_file_selector.z_probe_objects(group)
        z_probe_files = self.m_file_selector.get_paths(
            z_probe_objects,
            'projected' if self.m_use_projected_dir else 'features')
        # preload the probe files for a faster access (and fewer network load)
        if preload_probes:
            utils.info("- Scoring: preloading Z-probe files of group '%s'" %
                       group)
            # read all probe files into memory
            if self.m_file_selector.uses_probe_file_sets():
                preloaded_z_probes = [[
                    self.m_tool.read_probe(str(z_probe_file))
                    for z_probe_file in file_set
                ] for file_set in z_probe_files]
            else:
                preloaded_z_probes = [
                    self.m_tool.read_probe(str(z_probe_file))
                    for z_probe_file in z_probe_files
                ]

        utils.info("- Scoring: computing score matrix B for group '%s'" %
                   group)

        # Loads the models
        for model_id in model_ids:
            # test if the file is already there
            score_file = self.m_file_selector.b_file(model_id, group)
            if self.__check_file__(score_file, force):
                utils.warn("score file '%s' already exists." % (score_file))
            else:
                model = self.m_tool.read_model(
                    self.m_file_selector.model_file(model_id, group))
                if preload_probes:
                    b = self.__scores_preloaded__(model, preloaded_z_probes)
                else:
                    b = self.__scores__(model, z_probe_files)
                bob.io.save(b, score_file)
Exemplo n.º 18
0
 def train_extractor(self, extractor, preprocessor, force=False):
     """Trains the feature extractor using preprocessed data of the 'world' set, if the feature extractor requires training."""
     if extractor.requires_training:
         extractor_file = self.m_file_selector.extractor_file
         if self.__check_file__(extractor_file, force, 1000):
             utils.info("- Extraction: extractor '%s' already exists." %
                        extractor_file)
         else:
             utils.ensure_dir(os.path.dirname(extractor_file))
             # read training files
             if extractor.split_training_data_by_client:
                 train_files = self.m_file_selector.training_list(
                     'preprocessed',
                     'train_extractor',
                     arrange_by_client=True)
                 train_data = self.__read_data_by_client__(
                     train_files, preprocessor)
                 utils.info(
                     "- Extraction: training extractor '%s' using %d identities: "
                     % (extractor_file, len(train_files)))
             else:
                 train_files = self.m_file_selector.training_list(
                     'preprocessed', 'train_extractor')
                 train_data = self.__read_data__(train_files, preprocessor)
                 utils.info(
                     "- Extraction: training extractor '%s' using %d training files: "
                     % (extractor_file, len(train_files)))
             # train model
             extractor.train(train_data, extractor_file, train_files)
Exemplo n.º 19
0
  def gmm_estep(self, indices, force=False):
    """Performs a single E-step of the GMM training (parallel)."""
    stats_file = self.m_configuration.gmm_stats_file % (self.m_args.iteration, indices[0], indices[1])

    if  self.m_tool_chain.__check_file__(stats_file, force, 1000):
      utils.info("UBM training: Skipping GMM E-Step since the file '%s' already exists" % stats_file)
    else:
      training_list = self.m_file_selector.training_feature_list()
      machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
      gmm_machine = bob.machine.GMMMachine(bob.io.HDF5File(machine_file))

      utils.info("UBM training: GMM E-Step from range(%d, %d)" % indices)

      # read data
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in range(indices[0], indices[1])])

      gmm_trainer = bob.trainer.ML_GMMTrainer(self.m_tool.m_update_means, self.m_tool.m_update_variances, self.m_tool.m_update_weights)
      gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
      gmm_trainer.initialize(gmm_machine, data)

      # Calls the E-step and extracts the GMM statistics
      gmm_trainer.e_step(gmm_machine, data)
      gmm_stats = gmm_trainer.gmm_statistics

      # Saves the GMM statistics to the file
      utils.ensure_dir(os.path.dirname(stats_file))
      gmm_stats.save(bob.io.HDF5File(stats_file, 'w'))
      utils.info("UBM training: Wrote GMM stats '%s'" % (stats_file))
Exemplo n.º 20
0
    def train_projector(self, tool, extractor, force=False):
        """Train the feature projector with the extracted features of the world group."""
        if tool.requires_projector_training:
            projector_file = self.m_file_selector.projector_file

            if self.__check_file__(projector_file, force, 1000):
                utils.info("- Projection: projector '%s' already exists." %
                           projector_file)
            else:
                utils.ensure_dir(os.path.dirname(projector_file))
                # train projector
                if tool.split_training_features_by_client:
                    train_files = self.m_file_selector.training_list(
                        'features', 'train_projector', arrange_by_client=True)
                    train_features = self.__read_features_by_client__(
                        train_files, extractor)
                    utils.info(
                        "- Projection: training projector '%s' using %d identities: "
                        % (projector_file, len(train_files)))
                else:
                    train_files = self.m_file_selector.training_list(
                        'features', 'train_projector')
                    train_features = self.__read_features__(
                        train_files, extractor)
                    utils.info(
                        "- Projection: training projector '%s' using %d training files: "
                        % (projector_file, len(train_files)))

                # perform training
                tool.train_projector(train_features, str(projector_file))
Exemplo n.º 21
0
    def feature_normalization(self, indices, force=False):
        """Normalizes the list of features to have zero mean and unit variance (parallel)"""
        normalized_list = self.m_file_selector.training_feature_list()

        utils.info("UBM training: normalizing features from range(%d, %d)" %
                   indices)

        # iterate through the files and normalize the features
        for index in range(indices[0], indices[1]):
            feature = bob.io.load(str(training_list[index]))

            mean, std = self.m_tool.__normalize_std_array__(feature)

            if self.m_tool_chain.__check_file__(normalized_list[index], force):
                utils.debug("Skipping file '%s'" % normalized_list[index])
            else:
                utils.ensure_dir(os.path.dirname(normalized_list[index]))
                f = bob.io.HDF5File(str(normalized_list[index]), 'w')
                f.set('mean', mean)
                f.set('std', std)
                utils.debug("Saved normalized feature %s" %
                            str(normalized_list[index]))
Exemplo n.º 22
0
    def zt_norm(self, groups=['dev', 'eval']):
        """Computes ZT-Norm using the previously generated A, B, C, and D files"""
        for group in groups:
            utils.info("- Scoring: computing ZT-norm for group '%s'" % group)
            # list of models
            model_ids = self.m_file_selector.model_ids(group)
            t_model_ids = self.m_file_selector.t_model_ids(group)

            # first, normalize C and D scores
            self.__scores_c_normalize__(model_ids, t_model_ids, group)
            # and normalize it
            self.__scores_d_normalize__(t_model_ids, group)

            # load D matrices only once
            d = bob.io.load(self.m_file_selector.d_matrix_file(group))
            d_same_value = bob.io.load(
                self.m_file_selector.d_same_value_matrix_file(group)).astype(
                    bool)
            # Loops over the model ids
            for model_id in model_ids:
                # Loads probe files to get information about the type of access
                probe_objects = self.m_file_selector.probe_objects_for_model(
                    model_id, group)

                # Loads A, B, and C matrices for current model id
                a = bob.io.load(self.m_file_selector.a_file(model_id, group))
                b = bob.io.load(self.m_file_selector.b_file(model_id, group))
                c = bob.io.load(
                    self.m_file_selector.c_file_for_model(model_id, group))

                # compute zt scores
                zt_scores = bob.machine.ztnorm(a, b, c, d, d_same_value)

                # Saves to text file
                self.__save_scores__(
                    self.m_file_selector.zt_norm_file(model_id, group),
                    zt_scores, probe_objects,
                    self.m_file_selector.client_id(model_id))
Exemplo n.º 23
0
    def __train_pca__(self, feature_space):
        """Generates the PCA covariance matrix"""

        # Initializes the data to apply PCA on.
        data_list = []
        for client in feature_space:
            for feature in client:
                data_list.append(feature)
        data = numpy.vstack(data_list)
        del data_list

        utils.info("  -> Training LinearMachine using PCA")

        # Training.
        t = bob.trainer.PCATrainer()
        machine, variances = t.train(data)
        del data

        # Compute variance percentage, if desired.
        if isinstance(self.m_subspace_dim, float):
            cummulated = numpy.cumsum(variances) / numpy.sum(variances)
            for index in range(len(cummulated)):
                if cummulated[index] > self.m_subspace_dim:
                    self.m_subspace_dim = index
                    break
            self.m_subspace_dim = index
            del cummulated

        utils.info("    ... Keeping %d PCA dimensions" % self.m_subspace_dim)

        # Re-shape machine.
        machine.resize(machine.shape[0], self.m_subspace_dim)
        variances.resize(self.m_subspace_dim)

        # Return machine.
        return machine, variances
Exemplo n.º 24
0
    def kmeans_estep(self, indices, force=False):
        """Performs a single E-step of the K-Means algorithm (parallel)"""
        stats_file = self.m_configuration.kmeans_stats_file % (
            self.m_args.iteration, indices[0], indices[1])

        if self.m_tool_chain.__check_file__(stats_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans E-Step since the file '%s' already exists"
                % stats_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(machine_file))

            utils.info("UBM training: KMeans E-Step from range(%d, %d)" %
                       indices)

            # read data
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in range(indices[0], indices[1])
            ])

            kmeans_trainer = bob.trainer.KMeansTrainer()
            t = bob.machine.KMeansMachine(
                self.m_tool.m_gaussians, data.shape[1]
            )  # Temporary Kmeans machine required for trainer initialization
            kmeans_trainer.initialize(t, data)

            # Performs the E-step
            kmeans_trainer.e_step(kmeans_machine, data)

            # write results to file
            dist = numpy.array([kmeans_trainer.average_min_distance])
            nsamples = numpy.array([indices[1] - indices[0]],
                                   dtype=numpy.float64)

            utils.ensure_dir(os.path.dirname(stats_file))
            f = bob.io.HDF5File(stats_file, 'w')
            f.set('zeros', kmeans_trainer.zeroeth_order_statistics)
            f.set('first', kmeans_trainer.first_order_statistics)
            f.set('dist', dist * nsamples)
            f.set('nsamples', nsamples)
            utils.info("UBM training: Wrote Stats file '%s'" % stats_file)
Exemplo n.º 25
0
    def gmm_initialize(self, force=False):
        """Initializes the GMM calculation with the result of the K-Means algorithm (non-parallel).
    This might require a lot of memory."""
        output_file = self.m_configuration.gmm_intermediate_file % 0

        if self.m_tool_chain.__check_file__(output_file, force, 800):
            utils.info(
                "UBM Training: Skipping GMM initialization since '%s' already exists"
                % output_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            utils.info("UBM Training: Initializing GMM")

            # load KMeans machine
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(self.m_configuration.kmeans_file))

            # read features
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in utils.quasi_random_indices(
                    len(training_list), self.m_args.limit_training_examples)
            ])

            # Create initial GMM Machine
            gmm_machine = bob.machine.GMMMachine(self.m_tool.m_gaussians,
                                                 data.shape[1])

            [
                variances, weights
            ] = kmeans_machine.get_variances_and_weights_for_each_cluster(data)

            # Initializes the GMM
            gmm_machine.means = kmeans_machine.means
            gmm_machine.variances = variances
            gmm_machine.weights = weights
            gmm_machine.set_variance_thresholds(
                self.m_tool.m_variance_threshold)

            utils.ensure_dir(os.path.dirname(output_file))
            gmm_machine.save(bob.io.HDF5File(os.path.join(output_file), 'w'))
            utils.info("UBM Training: Wrote GMM file '%s'" % output_file)
Exemplo n.º 26
0
  def kmeans_initialize(self, force=False):
    """Initializes the K-Means training (non-parallel)."""
    output_file = self.m_configuration.kmeans_intermediate_file % 0

    if self.m_tool_chain.__check_file__(output_file, force, 1000):
      utils.info("UBM training: Skipping KMeans initialization since the file '%s' already exists" % output_file)
    else:
      # read data
      utils.info("UBM training: initializing kmeans")
      training_list = self.m_file_selector.training_feature_list()
      data = numpy.vstack([bob.io.load(str(training_list[index])) for index in utils.quasi_random_indices(len(training_list), self.m_args.limit_training_examples)])

      # Perform KMeans initialization
      kmeans_machine = bob.machine.KMeansMachine(self.m_tool.m_gaussians, data.shape[1])
      # Creates the KMeansTrainer and call the initialization procedure
      kmeans_trainer = bob.trainer.KMeansTrainer()
      kmeans_trainer.initialize(kmeans_machine, data)
      utils.ensure_dir(os.path.dirname(output_file))
      kmeans_machine.save(bob.io.HDF5File(output_file, 'w'))
      utils.info("UBM training: saved initial KMeans machine to '%s'" % output_file)
Exemplo n.º 27
0
    def gmm_estep(self, indices, force=False):
        """Performs a single E-step of the GMM training (parallel)."""
        stats_file = self.m_configuration.gmm_stats_file % (
            self.m_args.iteration, indices[0], indices[1])

        if self.m_tool_chain.__check_file__(stats_file, force, 1000):
            utils.info(
                "UBM training: Skipping GMM E-Step since the file '%s' already exists"
                % stats_file)
        else:
            training_list = self.m_file_selector.training_feature_list()
            machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
            gmm_machine = bob.machine.GMMMachine(bob.io.HDF5File(machine_file))

            utils.info("UBM training: GMM E-Step from range(%d, %d)" % indices)

            # read data
            data = numpy.vstack([
                bob.io.load(str(training_list[index]))
                for index in range(indices[0], indices[1])
            ])

            gmm_trainer = bob.trainer.ML_GMMTrainer(
                self.m_tool.m_update_means, self.m_tool.m_update_variances,
                self.m_tool.m_update_weights)
            gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
            gmm_trainer.initialize(gmm_machine, data)

            # Calls the E-step and extracts the GMM statistics
            gmm_trainer.e_step(gmm_machine, data)
            gmm_stats = gmm_trainer.gmm_statistics

            # Saves the GMM statistics to the file
            utils.ensure_dir(os.path.dirname(stats_file))
            gmm_stats.save(bob.io.HDF5File(stats_file, 'w'))
            utils.info("UBM training: Wrote GMM stats '%s'" % (stats_file))
Exemplo n.º 28
0
def face_verify(args,
                command_line_parameters,
                external_dependencies=[],
                external_fake_job_id=0):
    """This is the main entry point for computing face verification experiments.
  You just have to specify configurations for any of the steps of the toolchain, which are:
  -- the database
  -- the preprocessing
  -- feature extraction
  -- the recognition tool
  -- and the grid configuration (in case, the function should be executed in the grid).
  Additionally, you can skip parts of the toolchain by selecting proper --skip-... parameters.
  If your probe files are not too big, you can also specify the --preload-probes switch to speed up the score computation.
  If files should be re-generated, please specify the --force option (might be combined with the --skip-... options)."""

    # generate tool chain executor
    executor = ToolChainExecutorZT(args)
    # as the main entry point, check whether the grid option was given
    if not args.grid:
        if args.timer is not None and not len(args.timer):
            args.timer = ('real', 'system', 'user')
        # not in a grid, use default tool chain sequentially
        if args.timer:
            utils.info("- Timer: Starting timer")
            start_time = os.times()

        executor.write_info(command_line_parameters)

        executor.execute_tool_chain()

        if args.timer:
            end_time = os.times()
            utils.info("- Timer: Stopped timer")

            for t in args.timer:
                index = {'real': 4, 'system': 1, 'user': 0}[t]
                print "Elapsed", t, "time:", end_time[index] - start_time[
                    index], "seconds"

        return {}

    elif args.sub_task:
        # execute the desired sub-task
        executor.execute_grid_job()
        return {}
    else:
        # no other parameter given, so deploy new jobs

        # get the name of this file
        this_file = __file__
        if this_file[-1] == 'c':
            this_file = this_file[0:-1]

        executor.write_info(command_line_parameters)

        # initialize the executor to submit the jobs to the grid
        executor.set_common_parameters(calling_file=this_file,
                                       parameters=command_line_parameters,
                                       fake_job_id=external_fake_job_id)

        # add the jobs
        job_ids = executor.add_jobs_to_grid(external_dependencies)

        if executor.m_grid.is_local():
            # start the jman local deamon
            executor.execute_local_deamon()
            return {}

        else:
            return job_ids
Exemplo n.º 29
0
    def compute_scores(self,
                       tool,
                       compute_zt_norm,
                       force=False,
                       indices=None,
                       groups=['dev', 'eval'],
                       types=['A', 'B', 'C', 'D'],
                       preload_probes=False):
        """Computes the scores for the given groups (by default 'dev' and 'eval')."""
        # save tool for internal use
        self.m_tool = tool
        self.m_use_projected_dir = hasattr(tool, 'project')

        # load the projector and the enroller, if needed
        tool.load_projector(self.m_file_selector.projector_file)
        tool.load_enroller(self.m_file_selector.enroller_file)

        for group in groups:
            # get model ids
            model_ids = self.m_file_selector.model_ids(group)
            if compute_zt_norm:
                t_model_ids = self.m_file_selector.t_model_ids(group)

            # compute A scores
            if 'A' in types:
                if indices != None:
                    model_ids_short = model_ids[indices[0]:indices[1]]
                    utils.info("- Scoring: splitting of index range %s" %
                               str(indices))
                else:
                    model_ids_short = model_ids
                # we need to time this.
                timer = ('real', 'system', 'user')
                utils.info("-Timer: Starting Timer")
                start_time = os.times()
                # create inverted index file if requested by the algorithm
                if self.m_tool.requires_inverted_indexing:
                    self.__scores_inverted___(model_ids_short, group,
                                              compute_zt_norm, force,
                                              preload_probes)
                else:
                    self.__scores_a__(model_ids_short, group, compute_zt_norm,
                                      force, preload_probes)
                end_time = os.times()
                utils.info("-Timer: Stopped Timer")
                for t in timer:
                    index = {'real': 4, 'system': 1, 'user': 0}[t]
                    print "Elapsed", t, "time:", end_time[index] - start_time[
                        index], "seconds"

            if compute_zt_norm:
                # compute B scores
                if 'B' in types:
                    if indices != None:
                        model_ids_short = model_ids[indices[0]:indices[1]]
                        utils.info("- Scoring: splitting of index range %s" %
                                   str(indices))
                    else:
                        model_ids_short = model_ids
                    self.__scores_b__(model_ids_short, group, force,
                                      preload_probes)

                # compute C scores
                if 'C' in types:
                    if indices != None:
                        t_model_ids_short = t_model_ids[indices[0]:indices[1]]
                        utils.info("- Scoring: splitting of index range %s" %
                                   str(indices))
                    else:
                        t_model_ids_short = t_model_ids
                    self.__scores_c__(t_model_ids_short, group, force,
                                      preload_probes)

                # compute D scores
                if 'D' in types:
                    if indices != None:
                        t_model_ids_short = t_model_ids[indices[0]:indices[1]]
                        utils.info("- Scoring: splitting of index range %s" %
                                   str(indices))
                    else:
                        t_model_ids_short = t_model_ids
                    self.__scores_d__(t_model_ids_short, group, force,
                                      preload_probes)
Exemplo n.º 30
0
    def kmeans_mstep(self, counts, force=False):
        """Performs a single M-step of the K-Means algorithm (non-parallel)"""
        old_machine_file = self.m_configuration.kmeans_intermediate_file % self.m_args.iteration
        new_machine_file = self.m_configuration.kmeans_intermediate_file % (
            self.m_args.iteration + 1)

        if self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
            utils.info(
                "UBM training: Skipping KMeans M-Step since the file '%s' already exists"
                % new_machine_file)
        else:
            # get the files from e-step
            training_list = self.m_file_selector.training_feature_list()

            # try if there is one file containing all data
            if os.path.exists(self.m_configuration.kmeans_stats_file %
                              (self.m_args.iteration, 0, len(training_list))):
                stats_file = self.m_configuration.kmeans_stats_file % (
                    self.m_args.iteration, 0, len(training_list))
                # load stats file
                zeroeth, first, nsamples, dist = self.read_stats(stats_file)
            else:
                # load several files
                job_ids = range(
                    self.__generate_job_array__(training_list, counts)[1])
                job_indices = [(counts * job_id,
                                min(counts * (job_id + 1), len(training_list)))
                               for job_id in job_ids]
                stats_files = [
                    self.m_configuration.kmeans_stats_file %
                    (self.m_args.iteration, indices[0], indices[1])
                    for indices in job_indices
                ]

                # read all stats files
                zeroeth, first, nsamples, dist = self.read_stats(
                    stats_files[0])
                for stats_file in stats_files[1:]:
                    zeroeth_, first_, nsamples_, dist_ = self.read_stats(
                        stats_file)
                    zeroeth += zeroeth_
                    first += first_
                    nsamples += nsamples_
                    dist += dist_

            # read some features (needed for computation, but not really required)
            data = numpy.array(bob.io.load(str(training_list[0])))

            # Creates the KMeansTrainer
            kmeans_trainer = bob.trainer.KMeansTrainer()
            # Creates the KMeansMachine
            kmeans_machine = bob.machine.KMeansMachine(
                bob.io.HDF5File(old_machine_file))
            kmeans_trainer.initialize(kmeans_machine, data)

            kmeans_trainer.zeroeth_order_statistics = zeroeth
            kmeans_trainer.first_order_statistics = first
            kmeans_trainer.average_min_distance = dist

            # Performs the M-step
            kmeans_trainer.m_step(kmeans_machine,
                                  data)  # data is not used in M-step
            utils.info("UBM training: Performed M step %d with result %f" %
                       (self.m_args.iteration, dist / nsamples))

            # Save the K-Means model
            utils.ensure_dir(os.path.dirname(new_machine_file))
            kmeans_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
            shutil.copy(new_machine_file, self.m_configuration.kmeans_file)
            utils.info("UBM training: Wrote new KMeans machine '%s'" %
                       new_machine_file)

        if self.m_args.clean_intermediate and self.m_args.iteration > 0:
            old_file = self.m_configuration.kmeans_intermediate_file % (
                self.m_args.iteration - 1)
            utils.info("Removing old intermediate directory '%s'" %
                       os.path.dirname(old_file))
            shutil.rmtree(os.path.dirname(old_file))
Exemplo n.º 31
0
def main(command_line_parameters=None):
  """Reads score files, computes error measures and plots curves."""

  args = command_line_arguments(command_line_parameters)
  #print args
  # get some colors for plotting
  cmap = mpl.cm.get_cmap(name='hsv')
  colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files)+1)]

  score_parser = {'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser]
  ids_parser = {'4column' : bob.measure.load.four_column, '5column' : bob.measure.load.five_column}[args.parser]

  # First, read the score files
  utils.info("Loading %d score files of the development set" % len(args.dev_files))
  scores_dev = [score_parser(os.path.join(args.directory, f)) for f in args.dev_files]
  ids_dev = [ids_parser(os.path.join(args.directory, f)) for f in args.dev_files]
  id_dev = []
  for i in ids_dev[0]:
  	if i[0] == i[1]: id_dev.append(i + (1,)) 
  	else: id_dev.append(i + (0,)) 

  ids_dev = numpy.array(id_dev)
  if (args.norm == 'norm'):
    ids_dev[:,3]=ids_dev[:,3].astype(numpy.float64)/max(ids_dev[:,3].astype(numpy.float64))

  if args.eval_files:
    utils.info("Loading %d score files of the evaluation set" % len(args.eval_files))
    scores_eval = [score_parser(os.path.join(args.directory, f)) for f in args.eval_files]
    ids_eval = [ids_parser(os.path.join(args.directory, f)) for f in args.eval_files]
    id_eval = []
    for i in ids_eval[0]: 
      if i[0] == i[1]: id_eval.append(i + (1,)) 
      else: id_eval.append(i + (0,)) 

    ids_eval = numpy.array(id_eval)
    if (args.norm == 'norm'):
      ids_eval[:,3]=ids_eval[:,3].astype(numpy.float64)/max(ids_eval[:,3].astype(numpy.float64))

  if args.criterion:
    utils.info("Computing %s on the development " % args.criterion + ("and HTER on the evaluation set" if args.eval_files else "set"))
   	
    pdf = PdfPages(args.pdf)
    for i in range(len(scores_dev)):
      totalModels = numpy.unique(ids_dev[:,1])

      eer_mean, figure = _plot_scores((25,10), args, totalModels, ids_dev, 'development', "Fauna graph for development set")
      pdf.savefig(figure)
    
      # Plot the EER per model
      bob.io.base.save(eer_mean,'eer_per_model.mat')
      
      pdf.savefig(_plot_eer((25,10), eer_mean, "EER per model curve for development set"))

      # Plot the scores histogram
      scoresTarget = ids_dev[ids_dev[:,4]=='1',3].astype(numpy.float64)
      scoresNonTarget = ids_dev[ids_dev[:,4]=='0',3].astype(numpy.float64)    
      pdf.savefig(_plot_scores_hist((25,10), scoresTarget, scoresNonTarget, "Scores histogram for development set"))

    if args.eval_files:
      for i in range(len(scores_eval)):
        totalModels = numpy.unique(ids_eval[:,1])

        eer_mean, figure = _plot_scores((25,10), args, totalModels, ids_eval, 'evaluation', "Fauna graph for evaluation set")
        pdf.savefig(figure)
      
        # Plot the EER per model
        pdf.savefig(_plot_eer((25,10), eer_mean, "EER per model curve for evaluation set"))

        # Plot the scores histogram
        scoresTarget = ids_eval[ids_eval[:,4]=='1',3].astype(numpy.float64)
        scoresNonTarget = ids_eval[ids_eval[:,4]=='0',3].astype(numpy.float64)    
        pdf.savefig(_plot_scores_hist((25,10), scoresTarget, scoresNonTarget, "Scores histogram for evaluation set"))

    pdf.close()
Exemplo n.º 32
0
def main(command_line_parameters=None):
    """Reads score files, computes error measures and plots curves."""

    args = command_line_arguments(command_line_parameters)

    # get some colors for plotting
    cmap = mpl.cm.get_cmap(name='hsv')
    colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files) + 1)]

    if args.criterion or args.roc or args.det or args.cllr or args.mindcf:
        score_parser = {
            '4column': bob.measure.load.split_four_column,
            '5column': bob.measure.load.split_five_column
        }[args.parser]

        # First, read the score files
        utils.info("Loading %d score files of the development set" %
                   len(args.dev_files))
        scores_dev = [
            score_parser(os.path.join(args.directory, f))
            for f in args.dev_files
        ]

        if args.eval_files:
            utils.info("Loading %d score files of the evaluation set" %
                       len(args.eval_files))
            scores_eval = [
                score_parser(os.path.join(args.directory, f))
                for f in args.eval_files
            ]

        if args.criterion:
            utils.info("Computing %s on the development " % args.criterion +
                       ("and HTER on the evaluation set" if args.
                        eval_files else "set"))
            for i in range(len(scores_dev)):
                # compute threshold on development set
                threshold = {
                    'EER': bob.measure.eer_threshold,
                    'HTER': bob.measure.min_hter_threshold
                }[args.criterion](scores_dev[i][0], scores_dev[i][1])
                # apply threshold to development set
                far, frr = bob.measure.farfrr(scores_dev[i][0],
                                              scores_dev[i][1], threshold)
                print("The %s of the development set of '%s' is %2.3f%%" %
                      (args.criterion,
                       args.legends[i] if args.legends else args.dev_files[i],
                       (far + frr) * 50.))  # / 2 * 100%
                if args.eval_files:
                    # apply threshold to evaluation set
                    far, frr = bob.measure.farfrr(scores_eval[i][0],
                                                  scores_eval[i][1], threshold)
                    print("The HTER of the evaluation set of '%s' is %2.3f%%" %
                          (args.legends[i] if args.legends else
                           args.dev_files[i], (far + frr) * 50.))  # / 2 * 100%

        if args.mindcf:
            utils.info("Computing minDCF on the development " + (
                "and on the evaluation set" if args.eval_files else "set"))
            for i in range(len(scores_dev)):
                # compute threshold on development set
                threshold = bob.measure.min_weighted_error_rate_threshold(
                    scores_dev[i][0], scores_dev[i][1], args.cost)
                # apply threshold to development set
                far, frr = bob.measure.farfrr(scores_dev[i][0],
                                              scores_dev[i][1], threshold)
                print("The minDCF of the development set of '%s' is %2.3f%%" %
                      (args.legends[i] if args.legends else args.dev_files[i],
                       (args.cost * far + (1 - args.cost) * frr) * 100.))
                if args.eval_files:
                    # compute threshold on evaluation set
                    threshold = bob.measure.min_weighted_error_rate_threshold(
                        scores_eval[i][0], scores_eval[i][1], args.cost)
                    # apply threshold to evaluation set
                    far, frr = bob.measure.farfrr(scores_eval[i][0],
                                                  scores_eval[i][1], threshold)
                    print(
                        "The minDCF of the evaluation set of '%s' is %2.3f%%" %
                        (args.legends[i]
                         if args.legends else args.eval_files[i],
                         (args.cost * far + (1 - args.cost) * frr) * 100.))

        if args.cllr:
            utils.info("Computing Cllr and minCllr on the development " + (
                "and on the evaluation set" if args.eval_files else "set"))
            for i in range(len(scores_dev)):
                cllr = bob.measure.calibration.cllr(scores_dev[i][0],
                                                    scores_dev[i][1])
                min_cllr = bob.measure.calibration.min_cllr(
                    scores_dev[i][0], scores_dev[i][1])
                print(
                    "Calibration performance on development set of '%s' is Cllr %1.5f and minCllr %1.5f "
                    % (args.legends[i], cllr, min_cllr))
                if args.eval_files:
                    cllr = bob.measure.calibration.cllr(
                        scores_eval[i][0], scores_eval[i][1])
                    min_cllr = bob.measure.calibration.min_cllr(
                        scores_eval[i][0], scores_eval[i][1])
                    print(
                        "Calibration performance on evaluation set of '%s' is Cllr %1.5f and minCllr %1.5f"
                        % (args.legends[i], cllr, min_cllr))

        if args.roc:
            utils.info("Computing CAR curves on the development " + (
                "and on the evaluation set" if args.eval_files else "set"))
            fars = [math.pow(10., i * 0.25) for i in range(-16, 0)] + [1.]
            frrs_dev = [
                bob.measure.roc_for_far(scores[0], scores[1], fars)
                for scores in scores_dev
            ]
            if args.eval_files:
                frrs_eval = [
                    bob.measure.roc_for_far(scores[0], scores[1], fars)
                    for scores in scores_eval
                ]

            utils.info("Plotting ROC curves to file '%s'" % args.roc)
            # create a multi-page PDF for the ROC curve
            pdf = PdfPages(args.roc)
            # create a separate figure for dev and eval
            pdf.savefig(
                _plot_roc(frrs_dev, colors,
                          args.legends if args.legends else args.dev_files,
                          "ROC curve for development set"))
            del frrs_dev
            if args.eval_files:
                pdf.savefig(
                    _plot_roc(
                        frrs_eval, colors,
                        args.legends if args.legends else args.eval_files,
                        "ROC curve for evaluation set"))
                del frrs_eval
            pdf.close()

        if args.det:
            utils.info("Computing DET curves on the development " + (
                "and on the evaluation set" if args.eval_files else "set"))
            dets_dev = [
                bob.measure.det(scores[0], scores[1], 1000)
                for scores in scores_dev
            ]
            if args.eval_files:
                dets_eval = [
                    bob.measure.det(scores[0], scores[1], 1000)
                    for scores in scores_eval
                ]

            utils.info("Plotting DET curves to file '%s'" % args.det)
            # create a multi-page PDF for the ROC curve
            pdf = PdfPages(args.det)
            # create a separate figure for dev and eval
            pdf.savefig(
                _plot_det(dets_dev, colors,
                          args.legends if args.legends else args.dev_files,
                          "DET plot for development set"))
            del dets_dev
            if args.eval_files:
                pdf.savefig(
                    _plot_det(
                        dets_eval, colors,
                        args.legends if args.legends else args.eval_files,
                        "DET plot for evaluation set"))
                del dets_eval
            pdf.close()

    if args.cmc:
        utils.info("Computing CMC curves on the development " +
                   ("and on the evaluation set" if args.eval_files else "set"))
        cmc_parser = {
            '4column': bob.measure.load.cmc_four_column,
            '5column': bob.measure.load.cmc_five_column
        }[args.parser]
        cmcs_dev = [
            cmc_parser(os.path.join(args.directory, f)) for f in args.dev_files
        ]
        if args.eval_files:
            cmcs_eval = [
                cmc_parser(os.path.join(args.directory, f))
                for f in args.eval_files
            ]

        utils.info("Plotting CMC curves to file '%s'" % args.cmc)
        # create a multi-page PDF for the ROC curve
        pdf = PdfPages(args.cmc)
        # create a separate figure for dev and eval
        pdf.savefig(
            _plot_cmc(cmcs_dev, colors,
                      args.legends if args.legends else args.dev_files,
                      "CMC curve for development set"))
        if args.eval_files:
            pdf.savefig(
                _plot_cmc(cmcs_eval, colors,
                          args.legends if args.legends else args.eval_files,
                          "CMC curve for evaluation set"))
        pdf.close()
Exemplo n.º 33
0
    def enroll_models(self,
                      tool,
                      extractor,
                      compute_zt_norm,
                      indices=None,
                      groups=['dev', 'eval'],
                      types=['N', 'T'],
                      force=False):
        """Enroll the models for 'dev' and 'eval' groups, for both models and T-Norm-models.
       This function uses the extracted or projected features to compute the models,
       depending on your setup of the base class Tool."""

        # read the projector file, if needed
        tool.load_projector(self.m_file_selector.projector_file)
        # read the model enrollment file
        tool.load_enroller(self.m_file_selector.enroller_file)

        # which tool to use to read the features...
        reader = tool if tool.use_projected_features_for_enrollment else extractor

        # Create Models
        if 'N' in types:
            for group in groups:
                model_ids = self.m_file_selector.model_ids(group)

                if indices != None:
                    model_ids = model_ids[indices[0]:indices[1]]
                    utils.info("- Enrollment: splitting of index range %s" %
                               str(indices))

                utils.info("- Enrollment: enrolling models of group '%s'" %
                           group)
                for model_id in model_ids:
                    # Path to the model
                    model_file = self.m_file_selector.model_file(
                        model_id, group)

                    # Removes old file if required
                    if not self.__check_file__(model_file, force):
                        enroll_files = self.m_file_selector.enroll_files(
                            model_id, group, 'projected'
                            if tool.use_projected_features_for_enrollment else
                            'features')

                        # load all files into memory
                        enroll_features = [
                            reader.read_feature(str(enroll_file))
                            for enroll_file in enroll_files
                        ]

                        model = tool.enroll(enroll_features)
                        # save the model
                        utils.ensure_dir(os.path.dirname(model_file))
                        tool.save_model(model, str(model_file))

        # T-Norm-Models
        if 'T' in types and compute_zt_norm:
            for group in groups:
                t_model_ids = self.m_file_selector.t_model_ids(group)

                if indices != None:
                    t_model_ids = t_model_ids[indices[0]:indices[1]]
                    utils.info("- Enrollment: splitting of index range %s" %
                               str(indices))

                utils.info("- Enrollment: enrolling T-models of group '%s'" %
                           group)
                for t_model_id in t_model_ids:
                    # Path to the model
                    t_model_file = self.m_file_selector.t_model_file(
                        t_model_id, group)

                    # Removes old file if required
                    if not self.__check_file__(t_model_file, force):
                        t_enroll_files = self.m_file_selector.t_enroll_files(
                            t_model_id, group, 'projected'
                            if tool.use_projected_features_for_enrollment else
                            'features')

                        # load all files into memory
                        t_enroll_features = [
                            reader.read_feature(str(t_enroll_file))
                            for t_enroll_file in t_enroll_files
                        ]

                        t_model = tool.enroll(t_enroll_features)
                        # save model
                        utils.ensure_dir(os.path.dirname(t_model_file))
                        tool.save_model(t_model, str(t_model_file))
Exemplo n.º 34
0
    def average_results(self):
        """Iterates over all the folds of the current view and computes the average result"""
        utils.info(" - Scoring: Averaging results of views %s" %
                   self.m_args.views)
        if not self.m_args.dry_run:
            file = open(self.m_configuration.result_file, 'w')
        if 'view1' in self.m_args.views:
            if self.m_args.dry_run:
                print "Would have averaged the results from view1 ..."
            else:
                # process the single result of view 1

                # HACK... Overwrite the score directory of the file selector to get the right result file
                self.m_file_selector.score_directories = (
                    self.__scores_directory__('view1'), )
                res_file = self.m_file_selector.no_norm_result_file('dev')

                negatives, positives = bob.measure.load.split_four_column(
                    res_file)
                threshold = bob.measure.eer_threshold(negatives, positives)

                far, frr = bob.measure.farfrr(negatives, positives, threshold)
                hter = (far + frr) / 2.0

                file.write(
                    "On view1 (dev set only):\n\nFAR = %.3f;\tFRR = %.3f;\tHTER = %.3f;\tthreshold = %.3f\n"
                    % (far, frr, hter, threshold))
                file.write("Classification success: %.2f%%\n\n" %
                           (self.__classification_result__(
                               negatives, positives, threshold) * 100.))

        if 'view2' in self.m_args.views:
            if self.m_args.dry_run:
                print "Would have averaged the results from view2 ..."
            else:
                file.write("On view2 (eval set only):\n\n")
                # iterate over all folds of view 2
                errors = numpy.ndarray((10, ), numpy.float64)
                for f in range(1, 11):
                    # HACK... Overwrite the score directory of the file selector to get the right result file
                    self.m_file_selector.score_directories = (
                        self.__scores_directory__('fold%d' % f), )
                    dev_res_file = self.m_file_selector.no_norm_result_file(
                        'dev')
                    eval_res_file = self.m_file_selector.no_norm_result_file(
                        'eval')

                    # compute threshold on dev data
                    dev_negatives, dev_positives = bob.measure.load.split_four_column(
                        dev_res_file)
                    threshold = bob.measure.eer_threshold(
                        dev_negatives, dev_positives)

                    # compute FAR and FRR for eval data
                    eval_negatives, eval_positives = bob.measure.load.split_four_column(
                        eval_res_file)

                    far, frr = bob.measure.farfrr(eval_negatives,
                                                  eval_positives, threshold)
                    hter = (far + frr) / 2.0

                    file.write(
                        "On fold%d:\n\nFAR = %.3f;\tFRR = %.3f;\tHTER = %.3f;\tthreshold = %.3f\n"
                        % (f, far, frr, hter, threshold))
                    result = self.__classification_result__(
                        eval_negatives, eval_positives, threshold)
                    file.write("Classification success: %.2f%%\n\n" %
                               (result * 100.))
                    errors[f - 1] = result

                # compute mean and std error
                mean = numpy.mean(errors)
                std = numpy.std(errors)
                file.write(
                    "\nOverall classification success: %f (with standard deviation %f)\n"
                    % (mean, std))
Exemplo n.º 35
0
    def __scores_a__(self, model_ids, group, compute_zt_norm, force,
                     preload_probes):
        """Computes A scores. For non-ZT-norm, these are the only scores that are actually computed."""
        # preload the probe files for a faster access (and fewer network load)
        if preload_probes:
            utils.info("- Scoring: preloading probe files of group '%s'" %
                       group)
            all_probe_objects = self.m_file_selector.probe_objects(group)
            all_probe_files = self.m_file_selector.get_paths(
                self.m_file_selector.probe_objects(group),
                'projected' if self.m_use_projected_dir else 'features')
            # read all probe files into memory
            if self.m_file_selector.uses_probe_file_sets():
                all_preloaded_probes = [[
                    self.m_tool.read_probe(str(probe_file))
                    for probe_file in file_set
                ] for file_set in all_probe_files]
            else:
                all_preloaded_probes = [
                    self.m_tool.read_probe(str(probe_file))
                    for probe_file in all_probe_files
                ]

        if compute_zt_norm:
            utils.info("- Scoring: computing score matrix A for group '%s'" %
                       group)
        else:
            utils.info("- Scoring: computing scores for group '%s'" % group)

        # Computes the raw scores for each model
        for model_id in model_ids:
            # test if the file is already there
            score_file = self.m_file_selector.a_file(
                model_id, group
            ) if compute_zt_norm else self.m_file_selector.no_norm_file(
                model_id, group)
            if self.__check_file__(score_file, force):
                utils.warn("score file '%s' already exists." % (score_file))
            else:
                # get the probe split
                current_probe_objects = self.m_file_selector.probe_objects_for_model(
                    model_id, group)
                model = self.m_tool.read_model(
                    self.m_file_selector.model_file(model_id, group))
                if preload_probes:
                    # select the probe files for this model from all probes
                    current_preloaded_probes = self.__probe_split__(
                        current_probe_objects, all_probe_objects,
                        all_preloaded_probes)
                    # compute A matrix
                    a = self.__scores_preloaded__(model,
                                                  current_preloaded_probes)
                else:
                    current_probe_files = self.m_file_selector.get_paths(
                        current_probe_objects, 'projected'
                        if self.m_use_projected_dir else 'features')
                    a = self.__scores__(model, current_probe_files)

                if compute_zt_norm:
                    # write A matrix only when you want to compute zt norm afterwards
                    bob.io.save(a,
                                self.m_file_selector.a_file(model_id, group))

                # Save scores to text file
                self.__save_scores__(
                    self.m_file_selector.no_norm_file(model_id, group), a,
                    current_probe_objects,
                    self.m_file_selector.client_id(model_id))
Exemplo n.º 36
0
def main(command_line_parameters=None):
  """Reads score files, computes error measures and plots curves."""

  args = command_line_arguments(command_line_parameters)

  # get some colors for plotting
  cmap = mpl.cm.get_cmap(name='hsv')
  colors = [cmap(i) for i in numpy.linspace(0, 1.0, len(args.dev_files)+1)]

  if args.criterion or args.roc or args.det or args.cllr or args.mindcf:
    score_parser = {'4column' : bob.measure.load.split_four_column, '5column' : bob.measure.load.split_five_column}[args.parser]

    # First, read the score files
    utils.info("Loading %d score files of the development set" % len(args.dev_files))
    scores_dev = [score_parser(os.path.join(args.directory, f)) for f in args.dev_files]

    if args.eval_files:
      utils.info("Loading %d score files of the evaluation set" % len(args.eval_files))
      scores_eval = [score_parser(os.path.join(args.directory, f)) for f in args.eval_files]


    if args.criterion:
      utils.info("Computing %s on the development " % args.criterion + ("and HTER on the evaluation set" if args.eval_files else "set"))
      for i in range(len(scores_dev)):
        # compute threshold on development set
        threshold = {'EER': bob.measure.eer_threshold, 'HTER' : bob.measure.min_hter_threshold} [args.criterion](scores_dev[i][0], scores_dev[i][1])
        # apply threshold to development set
        far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold)
        print("The %s of the development set of '%s' is %2.3f%%" % (args.criterion, args.legends[i] if args.legends else args.dev_files[i], (far + frr) * 50.)) # / 2 * 100%
        if args.eval_files:
          # apply threshold to evaluation set
          far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)
          print("The HTER of the evaluation set of '%s' is %2.3f%%" % (args.legends[i] if args.legends else args.dev_files[i], (far + frr) * 50.)) # / 2 * 100%


    if args.mindcf:
      utils.info("Computing minDCF on the development " + ("and on the evaluation set" if args.eval_files else "set"))
      for i in range(len(scores_dev)):
        # compute threshold on development set
        threshold = bob.measure.min_weighted_error_rate_threshold(scores_dev[i][0], scores_dev[i][1], args.cost)
        # apply threshold to development set
        far, frr = bob.measure.farfrr(scores_dev[i][0], scores_dev[i][1], threshold)
        print("The minDCF of the development set of '%s' is %2.3f%%" % (args.legends[i] if args.legends else args.dev_files[i], (args.cost * far + (1-args.cost) * frr) * 100. ))
        if args.eval_files:
          # compute threshold on evaluation set
          threshold = bob.measure.min_weighted_error_rate_threshold(scores_eval[i][0], scores_eval[i][1], args.cost)
          # apply threshold to evaluation set
          far, frr = bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)
          print("The minDCF of the evaluation set of '%s' is %2.3f%%" % (args.legends[i] if args.legends else args.eval_files[i], (args.cost * far + (1-args.cost) * frr) * 100. ))
          
      
    if args.cllr:
      utils.info("Computing Cllr and minCllr on the development " + ("and on the evaluation set" if args.eval_files else "set"))
      for i in range(len(scores_dev)):
        cllr = bob.measure.calibration.cllr(scores_dev[i][0], scores_dev[i][1])
        min_cllr = bob.measure.calibration.min_cllr(scores_dev[i][0], scores_dev[i][1])
        print("Calibration performance on development set of '%s' is Cllr %1.5f and minCllr %1.5f " % (args.legends[i], cllr, min_cllr))
        if args.eval_files:
          cllr = bob.measure.calibration.cllr(scores_eval[i][0], scores_eval[i][1])
          min_cllr = bob.measure.calibration.min_cllr(scores_eval[i][0], scores_eval[i][1])
          print("Calibration performance on evaluation set of '%s' is Cllr %1.5f and minCllr %1.5f" % (args.legends[i], cllr, min_cllr))


    if args.roc:
      utils.info("Computing CAR curves on the development " + ("and on the evaluation set" if args.eval_files else "set"))
      fars = [math.pow(10., i * 0.25) for i in range(-16,0)] + [1.]
      frrs_dev = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_dev]
      if args.eval_files:
        frrs_eval = [bob.measure.roc_for_far(scores[0], scores[1], fars) for scores in scores_eval]

      utils.info("Plotting ROC curves to file '%s'" % args.roc)
      # create a multi-page PDF for the ROC curve
      pdf = PdfPages(args.roc)
      # create a separate figure for dev and eval
      pdf.savefig(_plot_roc(frrs_dev, colors, args.legends if args.legends else args.dev_files, "ROC curve for development set"))
      del frrs_dev
      if args.eval_files:
        pdf.savefig(_plot_roc(frrs_eval, colors, args.legends if args.legends else args.eval_files, "ROC curve for evaluation set"))
        del frrs_eval
      pdf.close()


    if args.det:
      utils.info("Computing DET curves on the development " + ("and on the evaluation set" if args.eval_files else "set"))
      dets_dev = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_dev]
      if args.eval_files:
        dets_eval = [bob.measure.det(scores[0], scores[1], 1000) for scores in scores_eval]

      utils.info("Plotting DET curves to file '%s'" % args.det)
      # create a multi-page PDF for the ROC curve
      pdf = PdfPages(args.det)
      # create a separate figure for dev and eval
      pdf.savefig(_plot_det(dets_dev, colors, args.legends if args.legends else args.dev_files, "DET plot for development set"))
      del dets_dev
      if args.eval_files:
        pdf.savefig(_plot_det(dets_eval, colors, args.legends if args.legends else args.eval_files, "DET plot for evaluation set"))
        del dets_eval
      pdf.close()


  if args.cmc:
    utils.info("Computing CMC curves on the development " + ("and on the evaluation set" if args.eval_files else "set"))
    cmc_parser = {'4column' : bob.measure.load.cmc_four_column, '5column' : bob.measure.load.cmc_five_column}[args.parser]
    cmcs_dev = [cmc_parser(os.path.join(args.directory, f)) for f in args.dev_files]
    if args.eval_files:
      cmcs_eval = [cmc_parser(os.path.join(args.directory, f)) for f in args.eval_files]

    utils.info("Plotting CMC curves to file '%s'" % args.cmc)
    # create a multi-page PDF for the ROC curve
    pdf = PdfPages(args.cmc)
    # create a separate figure for dev and eval
    pdf.savefig(_plot_cmc(cmcs_dev, colors, args.legends if args.legends else args.dev_files, "CMC curve for development set"))
    if args.eval_files:
      pdf.savefig(_plot_cmc(cmcs_eval, colors, args.legends if args.legends else args.eval_files, "CMC curve for evaluation set"))
    pdf.close()
Exemplo n.º 37
0
    def gmm_mstep(self, counts, force=False):
        """Performs a single M-step of the GMM training (non-parallel)"""
        old_machine_file = self.m_configuration.gmm_intermediate_file % self.m_args.iteration
        new_machine_file = self.m_configuration.gmm_intermediate_file % (
            self.m_args.iteration + 1)

        if self.m_tool_chain.__check_file__(new_machine_file, force, 1000):
            utils.info(
                "UBM training: Skipping GMM M-Step since the file '%s' already exists"
                % new_machine_file)
        else:
            # get the files from e-step
            training_list = self.m_file_selector.training_feature_list()

            # try if there is one file containing all data
            if os.path.exists(self.m_configuration.gmm_stats_file %
                              (self.m_args.iteration, 0, len(training_list))):
                stats_file = self.m_configuration.gmm_stats_file % (
                    self.m_args.iteration, 0, len(training_list))
                # load stats file
                gmm_stats = bob.machine.GMMStats(bob.io.HDF5File(stats_file))
            else:
                # load several files
                job_ids = range(
                    self.__generate_job_array__(training_list, counts)[1])
                job_indices = [(counts * job_id,
                                min(counts * (job_id + 1), len(training_list)))
                               for job_id in job_ids]
                stats_files = [
                    self.m_configuration.gmm_stats_file %
                    (self.m_args.iteration, indices[0], indices[1])
                    for indices in job_indices
                ]

                # read all stats files
                gmm_stats = bob.machine.GMMStats(
                    bob.io.HDF5File(stats_files[0]))
                for stats_file in stats_files[1:]:
                    gmm_stats += bob.machine.GMMStats(
                        bob.io.HDF5File(stats_file))

            # read some features (needed for computation, but not really required)
            data = numpy.array(bob.io.load(str(training_list[0])))

            # load the old gmm machine
            gmm_machine = bob.machine.GMMMachine(
                bob.io.HDF5File(old_machine_file))
            # initialize the trainer
            gmm_trainer = bob.trainer.ML_GMMTrainer(
                self.m_tool.m_update_means, self.m_tool.m_update_variances,
                self.m_tool.m_update_weights)
            gmm_trainer.responsibilities_threshold = self.m_tool.m_responsibility_threshold
            gmm_trainer.initialize(gmm_machine, data)
            gmm_trainer.gmm_statistics = gmm_stats

            # Calls M-step
            gmm_trainer.m_step(gmm_machine, data)

            # Saves the GMM statistics to the file
            utils.ensure_dir(os.path.dirname(new_machine_file))
            gmm_machine.save(bob.io.HDF5File(new_machine_file, 'w'))
            import shutil
            shutil.copy(new_machine_file, self.m_configuration.projector_file)

        if self.m_args.clean_intermediate and self.m_args.iteration > 0:
            old_file = self.m_configuration.gmm_intermediate_file % (
                self.m_args.iteration - 1)
            utils.info("Removing old intermediate directory '%s'" %
                       os.path.dirname(old_file))
            shutil.rmtree(os.path.dirname(old_file))