def load(self) -> None: self.inverted_index = load_pickle(self.load_path / self.inverted_index_filename) self.entities_list = load_pickle(self.load_path / self.entities_list_filename) self.q2name = load_pickle(self.load_path / self.q2name_filename) if self.who_entities_filename: self.who_entities = load_pickle(self.load_path / self.who_entities_filename) if self.freq_dict_filename: self.load_freq_dict(self.freq_dict_filename)
def load(self) -> None: self.word_to_idlist = load_pickle(self.load_path / self.word_to_idlist_filename) self.entities_list = load_pickle(self.load_path / self.entities_list_filename) self.word_list = list(self.word_to_idlist.keys()) self.entities_ranking_dict = load_pickle(self.load_path / self.entities_ranking_filename) if not self.fit_vectorizer: self.vectorizer = load_pickle(self.load_path / self.vectorizer_filename) self.faiss_index = faiss.read_index(str(expand_path(self.faiss_index_filename))) if self.use_gpu: res = faiss.StandardGpuResources() self.faiss_index = faiss.index_cpu_to_gpu(res, 0, self.faiss_index)
def __init__(self, data_dir=None, *args, **kwargs): if data_dir is None: data_dir = paths.USR_PATH data_dir = Path(data_dir) if self.dict_name is None: self.dict_name = args[0] if args else kwargs.get( 'dictionary_name', 'dictionary') data_dir = data_dir / self.dict_name alphabet_path = data_dir / 'alphabet.pkl' words_path = data_dir / 'words.pkl' words_trie_path = data_dir / 'words_trie.pkl' if not is_done(data_dir): print('Trying to build a dictionary in {}'.format(data_dir), file=sys.stderr) if data_dir.is_dir(): shutil.rmtree(data_dir) data_dir.mkdir(parents=True) words = self._get_source(data_dir, *args, **kwargs) words = {self._normalize(word) for word in words} alphabet = {c for w in words for c in w} alphabet.remove('⟬') alphabet.remove('⟭') save_pickle(alphabet, alphabet_path) save_pickle(words, words_path) words_trie = defaultdict(set) for word in words: for i in range(len(word)): words_trie[word[:i]].add(word[:i + 1]) words_trie[word] = set() words_trie = {k: sorted(v) for k, v in words_trie.items()} save_pickle(words_trie, words_trie_path) mark_done(data_dir) print('built', file=sys.stderr) else: print('Loading a dictionary from {}'.format(data_dir), file=sys.stderr) self.alphabet = load_pickle(alphabet_path) self.words_set = load_pickle(words_path) self.words_trie = load_pickle(words_trie_path)
def __init__(self, data_dir=None, *args, **kwargs): if data_dir is None: data_dir = paths.USR_PATH data_dir = Path(data_dir) if self.dict_name is None: self.dict_name = args[0] if args else kwargs.get('dictionary_name', 'dictionary') data_dir = data_dir / self.dict_name alphabet_path = data_dir / 'alphabet.pkl' words_path = data_dir / 'words.pkl' words_trie_path = data_dir / 'words_trie.pkl' if not is_done(data_dir): print('Trying to build a dictionary in {}'.format(data_dir), file=sys.stderr) if data_dir.is_dir(): shutil.rmtree(data_dir) data_dir.mkdir(parents=True) words = self._get_source(data_dir, *args, **kwargs) words = {self._normalize(word) for word in words} alphabet = {c for w in words for c in w} alphabet.remove('⟬') alphabet.remove('⟭') save_pickle(alphabet, alphabet_path) save_pickle(words, words_path) words_trie = defaultdict(set) for word in words: for i in range(len(word)): words_trie[word[:i]].add(word[:i+1]) words_trie[word] = set() words_trie = {k: sorted(v) for k, v in words_trie.items()} save_pickle(words_trie, words_trie_path) mark_done(data_dir) print('built', file=sys.stderr) else: print('Loading a dictionary from {}'.format(data_dir), file=sys.stderr) self.alphabet = load_pickle(alphabet_path) self.words_set = load_pickle(words_path) self.words_trie = load_pickle(words_trie_path)
def load(self): """ Load model from file. """ try: return load_pickle(self.ser_path) except FileNotFoundError as e: raise (e, "There is no model in the specified path: {}".format( self.ser_path))
def __init__(self, data_dir: [Path, str] = '', *args, dictionary_name: str = 'dictionary', **kwargs): data_dir = expand_path(data_dir) / dictionary_name alphabet_path = data_dir / 'alphabet.pkl' words_path = data_dir / 'words.pkl' words_trie_path = data_dir / 'words_trie.pkl' if not is_done(data_dir): log.info('Trying to build a dictionary in {}'.format(data_dir)) if data_dir.is_dir(): shutil.rmtree(str(data_dir)) data_dir.mkdir(parents=True) words = self._get_source(data_dir, *args, **kwargs) words = {self._normalize(word) for word in words} alphabet = {c for w in words for c in w} alphabet.remove('⟬') alphabet.remove('⟭') save_pickle(alphabet, alphabet_path) save_pickle(words, words_path) words_trie = defaultdict(set) for word in words: for i in range(len(word)): words_trie[word[:i]].add(word[:i + 1]) words_trie[word] = set() words_trie = {k: sorted(v) for k, v in words_trie.items()} save_pickle(words_trie, words_trie_path) mark_done(data_dir) log.info('built') else: log.info('Loading a dictionary from {}'.format(data_dir)) self.alphabet = load_pickle(alphabet_path) self.words_set = load_pickle(words_path) self.words_trie = load_pickle(words_trie_path)
def load(self, **kwargs) -> None: """Load classifier parameters""" log.info(f"Loading model from {self.load_path}") for path in self.load_path: if Path.is_file(path): self.ec_data += load_pickle(path) else: raise FileNotFoundError log.info(f"Loaded items {len(self.ec_data)}")
def load(self, **kwargs) -> None: """Load classifier parameters""" log.info(f"Loading model from {self.load_path}") for path in self.load_path: if is_file_exist(path): self.ec_data += load_pickle(path) else: log.info(f"File {path} does not exist") log.info(f"Loaded items {len(self.ec_data)}")
def __init__(self, data_dir: [Path, str]='', *args, dictionary_name: str='dictionary', **kwargs): data_dir = expand_path(data_dir) / dictionary_name alphabet_path = data_dir / 'alphabet.pkl' words_path = data_dir / 'words.pkl' words_trie_path = data_dir / 'words_trie.pkl' if not is_done(data_dir): log.info('Trying to build a dictionary in {}'.format(data_dir)) if data_dir.is_dir(): shutil.rmtree(str(data_dir)) data_dir.mkdir(parents=True) words = self._get_source(data_dir, *args, **kwargs) words = {self._normalize(word) for word in words} alphabet = {c for w in words for c in w} alphabet.remove('⟬') alphabet.remove('⟭') save_pickle(alphabet, alphabet_path) save_pickle(words, words_path) words_trie = defaultdict(set) for word in words: for i in range(len(word)): words_trie[word[:i]].add(word[:i+1]) words_trie[word] = set() words_trie = {k: sorted(v) for k, v in words_trie.items()} save_pickle(words_trie, words_trie_path) mark_done(data_dir) log.info('built') else: log.info('Loading a dictionary from {}'.format(data_dir)) self.alphabet = load_pickle(alphabet_path) self.words_set = load_pickle(words_path) self.words_trie = load_pickle(words_trie_path)
def __init__(self, wiki_filename: str, file_format: str = "hdt", lang: str = "@en", **kwargs) -> None: """ Args: wiki_filename: file with Wikidata file_format: format of Wikidata file lang: Russian or English language **kwargs: """ self.description_rel = "http://schema.org/description" self.file_format = file_format self.wiki_filename = str(expand_path(wiki_filename)) if self.file_format == "hdt": self.document = HDTDocument(self.wiki_filename) elif self.file_format == "pickle": self.document = load_pickle(self.wiki_filename) else: raise ValueError("Unsupported file format") self.lang = lang
def load(self) -> None: logger.info("Loading tfidf_vectorizer from {}".format(self.load_path)) self.vectorizer = load_pickle(expand_path(self.load_path))
def load(self) -> None: """Load classifier parameters""" log.info("Loading from {}".format(self.load_path)) self.ec_data, self.x_train_features = load_pickle( expand_path(self.load_path))
def load(self) -> None: """Load model""" logger.info("Loading tfidf_vectorizer from {}".format(self.load_path)) self.vectorizer = load_pickle(expand_path(self.load_path)) self.token2idx = self.vectorizer.vocabulary_
def load(self) -> None: logger.info("Loading classifier from {}".format(self.load_path)) self.clf = load_pickle(expand_path(self.load_path))
def __init__(self, wiki_first_par_filename, entities_num=2, **kwargs): self.wiki_first_par = load_pickle(str(expand_path(wiki_first_par_filename))) self.entities_num = entities_num
def __init__(self, q_to_page_filename, entities_num=5, **kwargs): self.q_to_page = load_pickle(str(expand_path(q_to_page_filename))) self.entities_num = entities_num
def load(self) -> None: self.q_to_name = load_pickle(self.load_path / self.q2name_filename) if self._relations_filename is not None: self._relations_mapping = load_pickle(self.load_path / self._relations_filename) self.wikidata = load_pickle(self.load_path / self.wiki_filename)
def load(self) -> None: """Load classifier parameters""" logger.info("Loading faq_logreg_model from {}".format(self.load_path)) self.logreg = load_pickle(expand_path(self.load_path))
def load(self) -> None: """Load classifier parameters""" logger.info("Loading faq_model from {}".format(self.load_path)) self.x_train_features, self.y_train = load_pickle(expand_path(self.load_path))
def load(self) -> None: """Load classifier parameters""" logger.info("Loading faq_model from {}".format(self.load_path)) self.x_train_features, self.y_train = load_pickle( expand_path(self.load_path))
def load(self) -> None: self.rel_q2name = load_pickle(self.load_path / self.rel_q2name_filename)
def load(self) -> None: logger.info("Loading faq_model from {}".format(self.load_path)) self.x_train_features, self.y_train = load_pickle( expand_path(self.load_path))
def load(self) -> None: self.inverted_index = load_pickle(self.load_path / self.inverted_index_filename) self.entities_list = load_pickle(self.load_path / self.entities_list_filename) self.q2name = load_pickle(self.load_path / self.q2name_filename)
def load(self) -> None: """Load TF-IDF vectorizer""" logger.info("Loading tfidf_vectorizer from {}".format(expand_path(self.load_path))) self.vectorizer = load_pickle(expand_path(self.load_path))