def _get_available_models(cls, recognizer): available_models = SortedDict() for directory in cls._get_search_path("models"): directory = os.path.join(directory, recognizer) if not os.path.exists(directory): continue meta_files = glob.glob(os.path.join(directory, "*.meta")) for meta_file in meta_files: meta = Recognizer.read_meta_file(meta_file) if not meta.has_key("name") or \ not meta.has_key("shortname"): continue model_file = meta_file.replace(".meta", ".model") if meta.has_key("path") and not os.path.exists(meta["path"]): # skip model if specified path is incorrect continue elif not meta.has_key("path") and os.path.exists(model_file): # if path option is missing, assume the .model file # is in the same directory meta["path"] = model_file available_models[meta["name"]] = meta return available_models
def read_meta_file(cls, meta_file): """ Read a .meta file. @type meta_file: str @param meta_file: meta file file to read @rtype: dict """ f = open(meta_file) ret = SortedDict() for line in f.readlines(): try: key, value = [s.strip() for s in line.strip().split("=")] ret[key] = value except ValueError: continue f.close() return ret
def _load_available_recognizers(cls): cls.available_recognizers = SortedDict() for directory in cls._get_search_path("engines"): if not os.path.exists(directory): continue for f in glob.glob(os.path.join(directory, "*.py")): if f.endswith("__init__.py") or f.endswith("setup.py"): continue module_name = os.path.basename(f).replace(".py", "") module_name += "recognizer" module = imp.load_source(module_name, f) try: name = module.RECOGNIZER_CLASS.RECOGNIZER_NAME cls.available_recognizers[name] = module.RECOGNIZER_CLASS except AttributeError: pass
def get_char_dict(self, directory, corpora): """ Returns a dictionary with xml file list. keys are character codes. values are arrays of xml files. directory: root directory corpora: corpora list to restrict to """ charcol = CharacterCollection() for file in glob.glob(os.path.join(directory, "*", "*")): corpus_name = file.split("/")[-2] # exclude data which are not in the wanted corpora if corpus_name not in corpora: continue if os.path.isdir(file): self.print_verbose("Loading dir %s" % file) charcol += CharacterCollection.from_character_directory(file) elif ".charcol" in file: self.print_verbose("Loading charcol %s" % file) gzip = False bz2 = False if file.endswith(".gz"): gzip = True if file.endswith(".bz2"): bz2 = True charcol2 = CharacterCollection() charcol2.read(file, gzip=gzip, bz2=bz2) charcol += charcol2 self.print_verbose("Grouping characters together...") dic = SortedDict() for set_name in charcol.get_set_list(): for char in charcol.get_characters(set_name): charcode = ord(char.get_unicode()) if not charcode in dic: dic[charcode] = [] dic[charcode].append(char) return dic
def _update_set_ids(self): self._SETIDS = SortedDict() for row in self._efa("SELECT * FROM character_sets ORDER BY setid"): self._SETIDS[row['name'].encode("utf8")] = row['setid']