Пример #1
0
 def testAddSame(self):
     path = os.path.join(self.currdir, "data", "collection", "test.charcol")
     charcol = CharacterCollection()
     charcol.read(path)
     charcol2 = CharacterCollection()
     charcol2.read(path)
     charcol3 = charcol.concatenate(charcol2, check_duplicate=True)
     self.assertEquals(charcol3.get_set_list(), ["一", "三", "二", "四"])
     self.assertEquals(len(charcol3.get_characters("一")), 3)
     self.assertEquals(len(charcol3.get_characters("三")), 2)
     self.assertEquals(len(charcol3.get_characters("二")), 1)
     self.assertEquals(len(charcol3.get_characters("四")), 0)
Пример #2
0
 def setUp(self):
     self.currdir = os.path.dirname(os.path.abspath(__file__))
     path = os.path.join(self.currdir, "data", "collection", "test.charcol")
     self.cc = CharacterCollection()
     self.cc.read(path)
     f = os.path.join(self.currdir, "data", "character.xml")
     self.c = Character()
     self.c.read(f)
Пример #3
0
def get_aggregated_charcol(tuples, dbpath=None):
    """
    Create a character collection out of other character collections,
    character directories, tomoe dictionaries or kuchibue databases.

    tuples: a list of tuples (TYPE, path list)
    """

    # number of files for each character collection type
    n_files = [len(t[1]) for t in tuples]

    # we don't need to merge character collections if only one is provided
    # this can save a lot of time for large collections
    if sum(n_files) == 1 and dbpath is None:
        idx = n_files.index(1)
        return _get_charcol(tuples[idx][0], tuples[idx][1][0])

    if dbpath is not None and dbpath.endswith(".chardb"):
        if os.path.exists(dbpath):
            print "%s exists already." % dbpath
            print "Continuing will modify it..."
            answer = raw_input("Continue anyway? (y/N)")
            if answer == "y":
                print "Overwrite to concatenate collections together " + \
                      "in a new database"
                print "Don't overwrite to append new characters or "  + \
                      "filter (-i,-e,-m) existing database"
                answer = raw_input("Overwrite it? (y/N)")
                if answer == "y":
                    os.unlink(dbpath)
            else:
                exit()

        charcol = CharacterCollection(dbpath)
        #charcol.WRITE_BACK = False
        #charcol.AUTO_COMMIT = True
    else:
        charcol = CharacterCollection()  # in memory db

    charcols = [_get_charcol(typ, path) \
                    for typ, paths in tuples for path in paths]

    charcol.merge(charcols)

    return charcol
Пример #4
0
 def testAdd(self):
     path = os.path.join(self.currdir, "data", "collection", "test.charcol")
     charcol = CharacterCollection()
     charcol.read(path)
     path2 = os.path.join(self.currdir, "data", "collection",
                          "test2.charcol")
     charcol2 = CharacterCollection()
     charcol2.read(path2)
     charcol3 = charcol + charcol2
     self.assertEquals(charcol3.get_set_list(),
                       ["一", "三", "二", "四", "a", "b", "c", "d"])
     self.assertEquals(len(charcol3.get_characters("一")), 3)
     self.assertEquals(len(charcol3.get_characters("三")), 2)
     self.assertEquals(len(charcol3.get_characters("二")), 1)
     self.assertEquals(len(charcol3.get_characters("四")), 0)
     self.assertEquals(len(charcol3.get_characters("a")), 3)
     self.assertEquals(len(charcol3.get_characters("b")), 2)
     self.assertEquals(len(charcol3.get_characters("c")), 1)
     self.assertEquals(len(charcol3.get_characters("d")), 0)
Пример #5
0
def _get_charcol(charcol_type, charcol_path):
    if charcol_type == TYPE_DIRECTORY:
        # charcol_path is actually a directory here
        return CharacterCollection.from_character_directory(charcol_path)

    elif charcol_type in (TYPE_CHARCOL, TYPE_CHARCOL_DB):
        return CharacterCollection(charcol_path)

    elif charcol_type == TYPE_TOMOE:
        return tomoe_dict_to_character_collection(charcol_path)

    elif charcol_type == TYPE_KUCHIBUE:
        return kuchibue_to_character_collection(charcol_path)
Пример #6
0
    def get_character_collection(self):
        charcol = CharacterCollection()
        assert(len(self._labels) == len(self._characters))

        # group characters with the same label into sets
        sets = {}
        for i in range(len(self._characters)):
            utf8 = self._labels[i]
            self._characters[i].set_utf8(utf8)
            sets[utf8] = sets.get(utf8, []) + [self._characters[i]]

        charcol.add_sets(sets.keys())

        for set_name, characters in sets.items():
            charcol.append_characters(set_name, characters)

        return charcol
Пример #7
0
 def __init__(self):
     self._charcol = CharacterCollection()
Пример #8
0
 def testToXML(self):
     charcol2 = CharacterCollection()
     charcol2.read_string(self.cc.to_xml())
     self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list())
     self.assertEquals(self.cc.get_all_characters(),
                       charcol2.get_all_characters())
Пример #9
0
 def testWriteBz2String(self):
     charcol2 = CharacterCollection()
     charcol2.read_string(self.cc.write_string(bz2=True), bz2=True)
     self.assertEquals(self.cc.get_set_list(), charcol2.get_set_list())
     self.assertEquals(self.cc.get_all_characters(),
                       charcol2.get_all_characters())