def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5, max_graph_size=16, criterion="min"): progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs) try: found_unions = [] for root in G.get_subgraph_roots(): if G.size_of_subgraph(root) > max_graph_size: continue best_grouping = G.optimize_partitions(root, evaluate_function, max_parts_per_group, max_graph_size, criterion) if not best_grouping is None: for subgroup in best_grouping: if len(subgroup) > 1: union = image_utilities.union_images(subgroup) found_unions.append(union) classification, confidence = self.guess_glyph_automatic( union) union.classify_heuristic(classification) part_name = "_group._part." + classification[0][1] for glyph in subgroup: glyph.classify_heuristic(part_name) progress.step() finally: progress.kill() return found_unions
def classify_list_manual(self, glyphs, id): """**classify_list_manual** (ImageList *glyphs*, String *id*) Sets the classification of the given *glyphs* to the given *id* and then adds the glyphs to the training data. Call this function when the end user definitively knows the identity of the glyphs. If *id* begins with the special prefix ``_group``, all of the glyphs in *glyphs* are combined and the result is added to the training data. This is useful for characters that always appear with multiple connnected components, such as the lower-case *i*. *glyphs* The glyphs to classify. *id* The class name. .. note:: Here *id* is a simple string, not of the `id_name`_ format, since the confidence of a manual classification is always 1.0.""" if id.startswith('_group'): if len(glyphs) > 1: parts = id.split('.') sub = '.'.join(parts[1:]) union = image_utilities.union_images(glyphs) for glyph in glyphs: if glyph.nrows > 2 and glyph.ncols > 2: glyph.classify_heuristic('_group._part.' + sub) self.generate_features(glyph) added, removed = self.classify_glyph_manual(union, sub) added.append(union) return added, removed else: # grouping a single glyph corrupts the classifier_glyph.xml file raise ClassifierError( "Grouping of only a single glyph is not allowed.") added = [] removed = util.sets.Set() for glyph in glyphs: for child in glyph.children_images: removed.add(child) new_glyphs = [] for glyph in glyphs: # Don't re-insert removed children glyphs if not glyph in removed: if not glyph in self.database: self.generate_features(glyph) new_glyphs.append(glyph) glyph.classify_manual([(1.0, id)]) added.extend(self._do_splits(self, glyph)) self.database.extend(new_glyphs) return added, list(removed)
def classify_list_manual(self, glyphs, id): """**classify_list_manual** (ImageList *glyphs*, String *id*) Sets the classification of the given *glyphs* to the given *id* and then adds the glyphs to the training data. Call this function when the end user definitively knows the identity of the glyphs. If *id* begins with the special prefix ``_group``, all of the glyphs in *glyphs* are combined and the result is added to the training data. This is useful for characters that always appear with multiple connnected components, such as the lower-case *i*. *glyphs* The glyphs to classify. *id* The class name. .. note:: Here *id* is a simple string, not of the `id_name`_ format, since the confidence of a manual classification is always 1.0.""" if id.startswith('_group'): if len(glyphs) > 1: parts = id.split('.') sub = '.'.join(parts[1:]) union = image_utilities.union_images(glyphs) for glyph in glyphs: if glyph.nrows > 2 and glyph.ncols > 2: glyph.classify_heuristic('_group._part.' + sub) self.generate_features(glyph) added, removed = self.classify_glyph_manual(union, sub) added.append(union) return added, removed else: # grouping a single glyph corrupts the classifier_glyph.xml file raise ClassifierError("Grouping of only a single glyph is not allowed.") added = [] removed = util.sets.Set() for glyph in glyphs: for child in glyph.children_images: removed.add(child) new_glyphs = [] for glyph in glyphs: # Don't re-insert removed children glyphs if not glyph in removed: if not glyph in self.database: self.generate_features(glyph) new_glyphs.append(glyph) glyph.classify_manual([(1.0, id)]) added.extend(self._do_splits(self, glyph)) self.database.extend(new_glyphs) return added, list(removed)
def _evaluate_subgroup(self, subgroup): if len(subgroup) > 1: union = image_utilities.union_images(subgroup) classification, confidence = self.guess_glyph_automatic(union) classification_name = classification[0][1] if (classification_name.startswith("_split") or classification_name.startswith("skip")): return 0.0 else: return classification[0][0] if len(subgroup): classification = subgroup[0].id_name[0] if classification[1].startswith('_group._part'): return 0.0 return classification[0] raise ValueError("Something is wrong here... Either you don't have classifier data or there is an internal error in the grouping algorithm.")
def _evaluate_subgroup(self, subgroup): if len(subgroup) > 1: union = image_utilities.union_images(subgroup) classification, confidence = self.guess_glyph_automatic(union) classification_name = classification[0][1] if (classification_name.startswith("_split") or classification_name.startswith("skip")): return 0.0 else: return classification[0][0] if len(subgroup): classification = subgroup[0].id_name[0] if classification[1].startswith('_group._part'): return 0.0 return classification[0] raise ValueError( "Something is wrong here... Either you don't have classifier data or there is an internal error in the grouping algorithm." )
def _find_group_unions(self, G, evaluate_function, max_parts_per_group=5, max_graph_size=16, criterion="min"): progress = util.ProgressFactory("Grouping glyphs...", G.nsubgraphs) try: found_unions = [] for root in G.get_subgraph_roots(): if G.size_of_subgraph(root) > max_graph_size: continue best_grouping = G.optimize_partitions( root, evaluate_function, max_parts_per_group, max_graph_size, criterion) if not best_grouping is None: for subgroup in best_grouping: if len(subgroup) > 1: union = image_utilities.union_images(subgroup) found_unions.append(union) classification, confidence = self.guess_glyph_automatic(union) union.classify_heuristic(classification) part_name = "_group._part." + classification[0][1] for glyph in subgroup: glyph.classify_heuristic(part_name) progress.step() finally: progress.kill() return found_unions