def testIsSmall(self): for filename, res in (("small.xml", True), ("small2.xml", True), ("small3.xml", True), ("small4.xml", True), ("small5.xml", True), ("non-small.xml", False)): f = os.path.join(self.currdir, "data", "small", filename) char = Character() char.read(f) self.assertEquals(char.get_writing().is_small(), res)
def _getCharacter(self): writing = self._getWriting() char = Character() char.set_writing(writing) char.set_utf8("A") return char
def testToSexp(self): f = os.path.join(self.currdir, "data", "character.xml") char = Character() char.read(f) f = open(os.path.join(self.currdir, "data", "character.sexp")) sexp = f.read().strip() f.close() self.assertEquals(char.to_sexp(), sexp)
def testReadXMLBZ2String(self): file = os.path.join(self.currdir, "data", "character.xml.bz2") file = open(file) string = file.read() file.close() char = Character() char.read_string(string, bz2=True) self._testReadXML(char)
def testWriteXMLFile(self): char = self._getCharacter() io = StringIO.StringIO() char.write(io) new_char = Character() new_char.read_string(io.getvalue()) self.assertEquals(char, new_char)
def testReadXMLString(self): file = os.path.join(self.currdir, "data", "character.xml") f = open(file) buf = f.read() f.close() char = Character() char.read_string(buf) self._testReadXML(char)
def set_writings(self, writings): """ writings: a list of tegaki.Writing objects. """ self._model.clear() characters = [] for writing in writings: char = Character() char.set_writing(writing) char.set_utf8("?") characters.append(char) self.set_characters(characters)
def _handle_START_BOX(self, args): if self._char: self._characters.append(self._char) if self._col == self.FRAME_COUNT_COL - 1: self._col = 0 if self._row == self.FRAME_COUNT_ROW - 1: self._row = 0 else: self._row += 1 else: self._col += 1 self._char = Character()
def _end_element(self, name): if name == "kanji": char = Character() char.set_utf8(self._utf8) char.set_writing(self._writing) self._charcol.add_set(self._utf8) self._charcol.append_character(self._utf8, char) for s in ["_tag", "_stroke"]: if s in self.__dict__: del self.__dict__[s] if name == "path": self._writing.append_stroke(self._stroke) self._stroke = None self._tag = None
def _start_element(self, name, attrs): self._tag = name if self._first_tag: self._first_tag = False if self._tag != "character-collection": raise ValueError, \ "The very first tag should be <character-collection>" if self._tag == "set": if not attrs.has_key("name"): raise ValueError, "<set> should have a name attribute" self._curr_set_name = attrs["name"].encode("UTF-8") self.add_set(self._curr_set_name) if self._tag == "character": self._curr_char = Character() self._curr_writing = self._curr_char.get_writing() self._curr_width = None self._curr_height = None self._curr_utf8 = None if self._tag == "stroke": self._curr_stroke = Stroke() elif self._tag == "point": point = Point() for key in ("x", "y", "pressure", "xtilt", "ytilt", "timestamp"): if attrs.has_key(key): value = attrs[key].encode("UTF-8") if key in ("pressure", "xtilt", "ytilt"): value = float(value) else: value = int(float(value)) else: value = None setattr(point, key, value) self._curr_stroke.append_point(point)
def _testReadXML(self, charcol): self.assertEquals(charcol.get_set_list(), ["一", "三", "二", "四"]) c = {} for k in [ "19968_1", "19968_2", "19968_3", "19977_1", "19977_2", "20108_1" ]: c[k] = Character() c[k].read( os.path.join(self.currdir, "data", "collection", k + ".xml")) self.assertEquals(charcol.get_characters("一"), [c["19968_1"], c["19968_2"], c["19968_3"]]) self.assertEquals(charcol.get_characters("三"), [c["19977_1"], c["19977_2"]]) self.assertEquals(charcol.get_characters("二"), [c["20108_1"]]) self.assertEquals(charcol.get_characters("四"), []) self.assertEquals(charcol.get_all_characters(), [ c["19968_1"], c["19968_2"], c["19968_3"], c["19977_1"], c["19977_2"], c["20108_1"] ])
def from_character_directory(directory, extensions=["xml", "bz2", "gz"], recursive=True, check_duplicate=False): """ Creates a character collection from a directory containing individual character files. """ regexp = re.compile("\.(%s)$" % "|".join(extensions)) charcol = CharacterCollection() for name in os.listdir(directory): full_path = os.path.join(directory, name) if os.path.isdir(full_path) and recursive: charcol += CharacterCollection.from_character_directory( full_path, extensions) elif regexp.search(full_path): char = Character() gzip = False bz2 = False if full_path.endswith(".gz"): gzip = True if full_path.endswith(".bz2"): bz2 = True try: char.read(full_path, gzip=gzip, bz2=bz2) except ValueError: continue # ignore malformed XML files utf8 = char.get_utf8() if utf8 is None: utf8 = "Unknown" charcol.add_set(utf8) if not check_duplicate or \ not char in charcol.get_characters(utf8): charcol.append_character(utf8, char) return charcol
def _convert_character(data): # converts a BLOB into an object char = Character() char.read_string(base64.b64decode(data), gzip=True) return char
def _handle_START_BOX(self, args): if self._char: self._characters.append(self._char) self._char = Character()
results = self._recognize(writing, n) if is_small: return results.to_small_kana() else: return results if __name__ == "__main__": import sys from tegaki.character import Character recognizer = sys.argv[1] # name of recognizer model = sys.argv[2] # name of model file char = Character() char.read(sys.argv[3]) # path of .xml file writing = char.get_writing() recognizers = Recognizer.get_available_recognizers() print "Available recognizers", recognizers if not recognizer in recognizers: raise Exception, "Not an available recognizer" recognizer_klass = recognizers[recognizer] recognizer = recognizer_klass() models = recognizer_klass.get_available_models() print "Available models", models
def testReadXMLGzipFile(self): file = os.path.join(self.currdir, "data", "character.xml.gzip") char = Character() char.read(file, gzip=True) self._testReadXML(char)
def testReadXMLFile(self): file = os.path.join(self.currdir, "data", "character.xml") char = Character() char.read(file) self._testReadXML(char)
def testCharacterEqualityNone(self): c = Character() self.assertTrue(c != None) self.assertFalse(c == None)
def testReadXMLBZ2File(self): file = os.path.join(self.currdir, "data", "character.xml.bz2") char = Character() char.read(file, bz2=True) self._testReadXML(char)