def get_dummy_index(self): dummyIndex = Index() dummyIndex.register_Token('DaDaDa', 'd01') dummyIndex.register_Token('DaDaDa', 'd01') dummyIndex.register_Token('DumDiDum', 'd02') dummyIndex.register_Token('DumDiDum', 'd03') return dummyIndex
class Indexer: """ Build an inverted index from assigned page objects. """ def __init__(self): self.__index = Index() self.__already_indexed_pages = set() def index_pages(self, pages): """ Index a sequence of pages """ if not isinstance(pages, Pages): raise TypeError('pages must be an instance of Pages') for page in pages: self.__index_page(page) def __index_page(self, page): """ Index a single page """ # Prevent that a page is indexed multiple times if page.title in self.__already_indexed_pages: return lexer = TokenLexer(page) for token in lexer.tokens(): self.__index.register_Token(token, page.title) # Memorize already indexed pages by his title self.__already_indexed_pages.add(page.title) @property def index(self): """ Returns the complete inverted index """ return self.__index
def index_data(self, page_index): key = '/index/{0}'.format(page_index) if Options().caching and DataCacher().cached_already(key): return DataCacher().get_cached(key) common = self.common_data if Options().include_drafts: posts = self.posts_files else: posts = self.posts_published data, _ = Index().data(page_index, posts) # We don't care yet about the introduction content combined = self.__combine(common, data) if Options().caching: DataCacher().cache(key, combined) return combined
class Indexer: def __init__(self, stopwords): self.__index = Index() self.__preprocessor = Preprocessor() self.__sw = stopwords def __process_word(self, word): return word def load_inverted_index(self): documents = self.__preprocessor.load_collection() dw = {} for doc in documents: for word in doc.get_content(): if (word not in self.__sw): w = self.__process_word(word) self.__index.add_tf_inverted_file(w, doc.get_name()) self.__index.update_file_vector(w, doc.get_name()) self.__index.normalize_doc_freq(doc.get_name()) def get_index(self): return self.__index
def __init__(self, stopwords): self.__index = Index() self.__preprocessor = Preprocessor() self.__sw = stopwords
def __init__(self): self.__index = Index() self.__already_indexed_pages = set()
def index_highlight_posts(self): return self.__get_data('index_highlight_posts', Index(), 'highlight_posts')
def index_spotlight_posts(self): return self.__get_data('index_spotlight_posts', Index(), 'spotlight_posts')
def index_max_posts(self): return self.__get_data('index_max_posts', Index(), 'max_posts')
def index_footer_menu(self): return self.__get_data('index_footer_menu', Index(), 'footer_menu')
def index_main_menu(self): return self.__get_data('index_main_menu', Index(), 'main_menu')