示例#1
0
 def _find_group_unions(self,
                        G,
                        evaluate_function,
                        max_parts_per_group=5,
                        max_graph_size=16,
                        criterion="min"):
     progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs)
     try:
         found_unions = []
         for root in G.get_subgraph_roots():
             if G.size_of_subgraph(root) > max_graph_size:
                 continue
             best_grouping = G.optimize_partitions(root, evaluate_function,
                                                   max_parts_per_group,
                                                   max_graph_size,
                                                   criterion)
             if not best_grouping is None:
                 for subgroup in best_grouping:
                     if len(subgroup) > 1:
                         union = image_utilities.union_images(subgroup)
                         found_unions.append(union)
                         classification, confidence = self.guess_glyph_automatic(
                             union)
                         union.classify_heuristic(classification)
                         part_name = "_group._part." + classification[0][1]
                     for glyph in subgroup:
                         glyph.classify_heuristic(part_name)
             progress.step()
     finally:
         progress.kill()
     return found_unions
示例#2
0
    def classify_list_manual(self, glyphs, id):
        """**classify_list_manual** (ImageList *glyphs*, String *id*)

Sets the classification of the given *glyphs* to the given *id* and
then adds the glyphs to the training data.  Call this function when the
end user definitively knows the identity of the glyphs.

If *id* begins with the special prefix ``_group``, all of the glyphs
in *glyphs* are combined and the result is added to the training
data.  This is useful for characters that always appear with multiple
connnected components, such as the lower-case *i*.

*glyphs*
    The glyphs to classify.

*id*
    The class name.

.. note::
   Here *id* is a simple string, not of the `id_name`_ format, since
   the confidence of a manual classification is always 1.0."""
        if id.startswith('_group'):
            if len(glyphs) > 1:
                parts = id.split('.')
                sub = '.'.join(parts[1:])
                union = image_utilities.union_images(glyphs)
                for glyph in glyphs:
                    if glyph.nrows > 2 and glyph.ncols > 2:
                        glyph.classify_heuristic('_group._part.' + sub)
                        self.generate_features(glyph)
                added, removed = self.classify_glyph_manual(union, sub)
                added.append(union)
                return added, removed
            else:
                # grouping a single glyph corrupts the classifier_glyph.xml file
                raise ClassifierError(
                    "Grouping of only a single glyph is not allowed.")

        added = []
        removed = util.sets.Set()
        for glyph in glyphs:
            for child in glyph.children_images:
                removed.add(child)

        new_glyphs = []
        for glyph in glyphs:
            # Don't re-insert removed children glyphs
            if not glyph in removed:
                if not glyph in self.database:
                    self.generate_features(glyph)
                    new_glyphs.append(glyph)
                glyph.classify_manual([(1.0, id)])
                added.extend(self._do_splits(self, glyph))
        self.database.extend(new_glyphs)
        return added, list(removed)
示例#3
0
    def classify_list_manual(self, glyphs, id):
        """**classify_list_manual** (ImageList *glyphs*, String *id*)

Sets the classification of the given *glyphs* to the given *id* and
then adds the glyphs to the training data.  Call this function when the
end user definitively knows the identity of the glyphs.

If *id* begins with the special prefix ``_group``, all of the glyphs
in *glyphs* are combined and the result is added to the training
data.  This is useful for characters that always appear with multiple
connnected components, such as the lower-case *i*.

*glyphs*
    The glyphs to classify.

*id*
    The class name.

.. note::
   Here *id* is a simple string, not of the `id_name`_ format, since
   the confidence of a manual classification is always 1.0."""
        if id.startswith('_group'):
            if len(glyphs) > 1:
                parts = id.split('.')
                sub = '.'.join(parts[1:])
                union = image_utilities.union_images(glyphs)
                for glyph in glyphs:
                    if glyph.nrows > 2 and glyph.ncols > 2:
                        glyph.classify_heuristic('_group._part.' + sub)
                        self.generate_features(glyph)
                added, removed = self.classify_glyph_manual(union, sub)
                added.append(union)
                return added, removed
            else:
                # grouping a single glyph corrupts the classifier_glyph.xml file
                raise ClassifierError("Grouping of only a single glyph is not allowed.")

        added = []
        removed = util.sets.Set()
        for glyph in glyphs:
            for child in glyph.children_images:
                removed.add(child)

        new_glyphs = []
        for glyph in glyphs:
            # Don't re-insert removed children glyphs
            if not glyph in removed:
                if not glyph in self.database:
                    self.generate_features(glyph)
                    new_glyphs.append(glyph)
                glyph.classify_manual([(1.0, id)])
                added.extend(self._do_splits(self, glyph))
        self.database.extend(new_glyphs)
        return added, list(removed)
示例#4
0
 def _evaluate_subgroup(self, subgroup):
     if len(subgroup) > 1:
         union = image_utilities.union_images(subgroup)
         classification, confidence = self.guess_glyph_automatic(union)
         classification_name = classification[0][1]
         if (classification_name.startswith("_split") or
             classification_name.startswith("skip")):
             return 0.0
         else:
             return classification[0][0]
     if len(subgroup):
         classification = subgroup[0].id_name[0]
         if classification[1].startswith('_group._part'):
             return 0.0
         return classification[0]
     raise ValueError("Something is wrong here...  Either you don't have classifier data or there is an internal error in the grouping algorithm.")
示例#5
0
 def _evaluate_subgroup(self, subgroup):
     if len(subgroup) > 1:
         union = image_utilities.union_images(subgroup)
         classification, confidence = self.guess_glyph_automatic(union)
         classification_name = classification[0][1]
         if (classification_name.startswith("_split")
                 or classification_name.startswith("skip")):
             return 0.0
         else:
             return classification[0][0]
     if len(subgroup):
         classification = subgroup[0].id_name[0]
         if classification[1].startswith('_group._part'):
             return 0.0
         return classification[0]
     raise ValueError(
         "Something is wrong here...  Either you don't have classifier data or there is an internal error in the grouping algorithm."
     )
示例#6
0
 def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5,
                       max_graph_size=16, criterion="min"):
     progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs)
     try:
         found_unions = []
         for root in G.get_subgraph_roots():
             if G.size_of_subgraph(root) > max_graph_size:
                 continue
             best_grouping = G.optimize_partitions(
             root, evaluate_function, max_parts_per_group, max_graph_size, criterion)
             if not best_grouping is None:
                 for subgroup in best_grouping:
                     if len(subgroup) > 1:
                         union = image_utilities.union_images(subgroup)
                         found_unions.append(union)
                         classification, confidence = self.guess_glyph_automatic(union)
                         union.classify_heuristic(classification)
                         part_name = "_group._part." + classification[0][1]
                     for glyph in subgroup:
                         glyph.classify_heuristic(part_name)
             progress.step()
     finally:
         progress.kill()
     return found_unions