Пример #1
0
 def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5,
                        max_graph_size=16, criterion="min"):
    import image_utilities
    progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs)
    try:
       found_unions = []
       for root in G.get_subgraph_roots():
          if G.size_of_subgraph(root) > max_graph_size:
             continue
          best_grouping = G.optimize_partitions(
             root, evaluate_function, max_parts_per_group, max_graph_size, criterion)
          if not best_grouping is None:
             for subgroup in best_grouping:
                if len(subgroup) > 1:
                   union = image_utilities.union_images(subgroup)
                   found_unions.append(union)
                   classification, confidence = self.guess_glyph_automatic(union)
                   union.classify_heuristic(classification)
                   part_name = "_group._part." + classification[0][1]
                   for glyph in subgroup:
                      glyph.classify_heuristic(part_name)
          progress.step()
    finally:
       progress.kill()
    return found_unions
Пример #2
0
 def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5,
                        max_graph_size=16, criterion="min"):
    import image_utilities
    progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs)
    try:
       found_unions = []
       for root in G.get_subgraph_roots():
          if G.size_of_subgraph(root) > max_graph_size:
             continue
          best_grouping = G.optimize_partitions(
             root, evaluate_function, max_parts_per_group, max_graph_size, criterion)
          if not best_grouping is None:
             for subgroup in best_grouping:
                if len(subgroup) > 1:
                   union = image_utilities.union_images(subgroup)
                   found_unions.append(union)
                   classification, confidence = self.guess_glyph_automatic(union)
                   union.classify_heuristic(classification)
                   part_name = "_group._part." + classification[0][1]
                   for glyph in subgroup:
                      glyph.classify_heuristic(part_name)
          progress.step()
    finally:
       progress.kill()
    return found_unions
Пример #3
0
    def classify_list_manual(self, glyphs, id):
        """**classify_list_manual** (ImageList *glyphs*, String *id*)

Sets the classification of the given *glyphs* to the given *id* and
then adds the glyphs to the training data.  Call this function when the
end user definitively knows the identity of the glyphs.

If *id* begins with the special prefix ``_group``, all of the glyphs
in *glyphs* are combined and the result is added to the training
data.  This is useful for characters that always appear with multiple
connnected components, such as the lower-case *i*.

*glyphs*
	The glyphs to classify.

*id*
	The class name.

.. note::
   Here *id* is a simple string, not of the `id_name`_ format, since
   the confidence of a manual classification is always 1.0."""
        if id.startswith('_group'):
            if len(glyphs) > 1:
                import image_utilities
                parts = id.split('.')
                sub = '.'.join(parts[1:])
                union = image_utilities.union_images(glyphs)
                for glyph in glyphs:
                    if glyph.nrows > 2 and glyph.ncols > 2:
                        glyph.classify_heuristic('_group._part.' + sub)
                        self.generate_features(glyph)
                added, removed = self.classify_glyph_manual(union, sub)
                added.append(union)
                return added, removed
            else:
                # grouping a single glyph corrupts the classifier_glyph.xml file
                raise ClassifierError(
                    "Grouping of only a single glyph is not allowed.")

        added = []
        removed = util.sets.Set()
        for glyph in glyphs:
            for child in glyph.children_images:
                removed.add(child)

        new_glyphs = []
        for glyph in glyphs:
            # Don't re-insert removed children glyphs
            if not glyph in removed:
                if not glyph in self.database:
                    self.generate_features(glyph)
                    new_glyphs.append(glyph)
                glyph.classify_manual([(1.0, id)])
                added.extend(self._do_splits(self, glyph))
        self.database.extend(new_glyphs)
        return added, list(removed)
Пример #4
0
   def classify_list_manual(self, glyphs, id):
      """**classify_list_manual** (ImageList *glyphs*, String *id*)

Sets the classification of the given *glyphs* to the given *id* and
then adds the glyphs to the training data.  Call this function when the
end user definitively knows the identity of the glyphs.

If *id* begins with the special prefix ``_group``, all of the glyphs
in *glyphs* are combined and the result is added to the training
data.  This is useful for characters that always appear with multiple
connnected components, such as the lower-case *i*.

*glyphs*
	The glyphs to classify.

*id*
	The class name.

.. note::
   Here *id* is a simple string, not of the `id_name`_ format, since
   the confidence of a manual classification is always 1.0."""
      if id.startswith('_group'):
         if len(glyphs) > 1:
            import image_utilities
            parts = id.split('.')
            sub = '.'.join(parts[1:])
            union = image_utilities.union_images(glyphs)
            for glyph in glyphs:
               if glyph.nrows > 2 and glyph.ncols > 2:
                  glyph.classify_heuristic('_group._part.' + sub)
                  self.generate_features(glyph)
            added, removed = self.classify_glyph_manual(union, sub)
            added.append(union)
            return added, removed
         else:
            # grouping a single glyph corrupts the classifier_glyph.xml file
            raise ClassifierError("Grouping of only a single glyph is not allowed.")

      added = []
      removed = util.sets.Set()
      for glyph in glyphs:
         for child in glyph.children_images:
            removed.add(child)

      new_glyphs = []
      for glyph in glyphs:
         # Don't re-insert removed children glyphs
         if not glyph in removed:
            if not glyph in self.database:
               self.generate_features(glyph)
               new_glyphs.append(glyph)
            glyph.classify_manual([(1.0, id)])
            added.extend(self._do_splits(self, glyph))
      self.database.extend(new_glyphs)
      return added, list(removed)
Пример #5
0
 def _evaluate_subgroup(self, subgroup):
    import image_utilities
    if len(subgroup) > 1:
       union = image_utilities.union_images(subgroup)
       classification, confidence = self.guess_glyph_automatic(union)
       classification_name = classification[0][1]
       if (classification_name.startswith("_split") or
           classification_name.startswith("skip")):
          return 0.0
       else:
          return classification[0][0]
    if len(subgroup):
       classification = subgroup[0].id_name[0]
       if classification[1].startswith('_group._part'):
          return 0.0
       return classification[0]
    raise ValueError("Something is wrong here...  Either you don't have classifier data or there is an internal error in the grouping algorithm.")
Пример #6
0
 def _evaluate_subgroup(self, subgroup):
    import image_utilities
    if len(subgroup) > 1:
       union = image_utilities.union_images(subgroup)
       classification, confidence = self.guess_glyph_automatic(union)
       classification_name = classification[0][1]
       if (classification_name.startswith("_split") or
           classification_name.startswith("skip")):
          return 0.0
       else:
          return classification[0][0]
    if len(subgroup):
       classification = subgroup[0].id_name[0]
       if classification[1].startswith('_group._part'):
          return 0.0
       return classification[0]
    raise ValueError("Something is wrong here...  Either you don't have classifier data or there is an internal error in the grouping algorithm.")