Пример #1
0
    def _get_available_models(cls, recognizer):
        available_models = SortedDict()

        for directory in cls._get_search_path("models"):
            directory = os.path.join(directory, recognizer)

            if not os.path.exists(directory):
                continue

            meta_files = glob.glob(os.path.join(directory, "*.meta"))

            for meta_file in meta_files:
                meta = Recognizer.read_meta_file(meta_file)

                if not meta.has_key("name") or \
                    not meta.has_key("shortname"):
                    continue

                model_file = meta_file.replace(".meta", ".model")

                if meta.has_key("path") and not os.path.exists(meta["path"]):
                    # skip model if specified path is incorrect
                    continue
                elif not meta.has_key("path") and os.path.exists(model_file):
                    # if path option is missing, assume the .model file
                    # is in the same directory
                    meta["path"] = model_file

                available_models[meta["name"]] = meta

        return available_models
Пример #2
0
    def read_meta_file(cls, meta_file):
        """
        Read a .meta file.

        @type meta_file: str
        @param meta_file: meta file file to read

        @rtype: dict
        """
        f = open(meta_file)
        ret = SortedDict()
        for line in f.readlines():
            try:
                key, value = [s.strip() for s in line.strip().split("=")]
                ret[key] = value
            except ValueError:
                continue
        f.close()
        return ret
Пример #3
0
    def _load_available_recognizers(cls):
        cls.available_recognizers = SortedDict()

        for directory in cls._get_search_path("engines"):
            if not os.path.exists(directory):
                continue

            for f in glob.glob(os.path.join(directory, "*.py")):
                if f.endswith("__init__.py") or f.endswith("setup.py"):
                    continue

                module_name = os.path.basename(f).replace(".py", "")
                module_name += "recognizer"
                module = imp.load_source(module_name, f)

                try:
                    name = module.RECOGNIZER_CLASS.RECOGNIZER_NAME
                    cls.available_recognizers[name] = module.RECOGNIZER_CLASS
                except AttributeError:
                    pass
Пример #4
0
    def get_char_dict(self, directory, corpora):
        """
        Returns a dictionary with xml file list.
            keys are character codes.
            values are arrays of xml files.

        directory: root directory
        corpora: corpora list to restrict to
        """
        charcol = CharacterCollection()
        for file in glob.glob(os.path.join(directory, "*", "*")):
            corpus_name = file.split("/")[-2]
            # exclude data which are not in the wanted corpora
            if corpus_name not in corpora:
                continue

            if os.path.isdir(file):
                self.print_verbose("Loading dir %s" % file)
                charcol += CharacterCollection.from_character_directory(file)
            elif ".charcol" in file:
                self.print_verbose("Loading charcol %s" % file)
                gzip = False
                bz2 = False
                if file.endswith(".gz"): gzip = True
                if file.endswith(".bz2"): bz2 = True
                charcol2 = CharacterCollection()
                charcol2.read(file, gzip=gzip, bz2=bz2)
                charcol += charcol2

        self.print_verbose("Grouping characters together...")
        dic = SortedDict()
        for set_name in charcol.get_set_list():
            for char in charcol.get_characters(set_name):
                charcode = ord(char.get_unicode())
                if not charcode in dic: dic[charcode] = []
                dic[charcode].append(char)

        return dic
Пример #5
0
 def _update_set_ids(self):
     self._SETIDS = SortedDict()
     for row in self._efa("SELECT * FROM character_sets ORDER BY setid"):
         self._SETIDS[row['name'].encode("utf8")] = row['setid']