def __init__(self, all_words, lang, max_words_on_page=1000, alphabet=None): print dt(), 'IndexBuilder: creating...' self.lang = lang self.alphabet = alphabet if not self.alphabet: self.alphabet = russian_alphabet() locale.setlocale(locale.LC_ALL, "russian") self.max_words_on_page = max_words_on_page self.need_div = True self.all_words = all_words self._build_init_data() self.separate_pages = self._get_separate_pages() print dt(), 'IndexBuilder: created - ok'
def __init__(self, all_words, lang, max_words_on_page=1000, alphabet=None, header=None, need_div=True, words_details=None): print dt(), 'IndexBuilder: creating...' self.lang = lang self.alphabet = alphabet self.header = header or u'Индекс' if not self.alphabet: self.alphabet = russian_alphabet() locale.setlocale(locale.LC_ALL, settings.LOCALE_FOR_INDEX_BUILDER) self.max_words_on_page = max_words_on_page self.need_div = need_div self.all_words = all_words self.words_details = words_details self._build_init_data() self.separate_pages = self._get_separate_pages() print dt(), 'IndexBuilder: created - ok'
def create_index(words, path, desc, debug=True, push=False, use_other=True, alphabet=None, force_wiki_title=None, max_words_on_page=None, force_letters=None, header=None, force_wiki_prefix=None, wiki_save_only_total=False, need_div=True, words_details=None): if not alphabet: alphabet = russian_alphabet() builder = IndexBuilder(words, 'ru', alphabet=alphabet, max_words_on_page=max_words_on_page or 2000, header=header, need_div=need_div, words_details=words_details) # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp): # print letter, builder.counts_1[letter] # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp): # print letter, builder.counts_2[letter] if debug: folders = path.split('/') subpath = PAGES_DIR for folder in folders: subpath = join(subpath, folder) # print subpath if not exists(subpath): mkdir(subpath) file_path = join(PAGES_DIR, path) wiki_prefix = u"Участник:Vitalik" if force_wiki_prefix: wiki_prefix = force_wiki_prefix letters = force_letters or alphabet if use_other: # letters += ['-'] letters += '-' for letter in letters: # print letter content = builder.get(letter) # letter = ur(letter) if debug: debug_write(file_path, letter, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, letter) if not wiki_save_only_total: save_wiki_page(wiki_title, content, desc) # sleep(1) # break # exit() for letter, pages in builder.separate_pages.items(): for page in sorted(pages, key=lambda x: len(x)): # print page if not use_other and page[0] == '-': continue content = builder.get(page) # page = ur(page) if debug: debug_write(file_path, page, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, page) if not wiki_save_only_total: save_wiki_page(wiki_title, content, desc) # sleep(1) if push and header: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, u"Итого") content = u"""{{Алфавит|%s|lang=ru}} == Итого == Итого в индексе "%s": '''<onlyinclude>%s</onlyinclude>''' статей """ % (header, header, len(words)) save_wiki_page(wiki_title, content, u'Итого: %s статей' % len(words))
def create_index(words, path, desc, debug=True, push=False, use_other=True, alphabet=None, force_wiki_title=None): if not alphabet: alphabet = russian_alphabet() builder = IndexBuilder(words, alphabet=alphabet) # for letter in sorted(builder.counts_1.keys(), cmp=wiki_cmp): # print letter, builder.counts_1[letter] # for letter in sorted(builder.counts_2.keys(), cmp=wiki_cmp): # print letter, builder.counts_2[letter] if debug: folders = path.split('/') subpath = PAGES_DIR for folder in folders: subpath = join(subpath, folder) if not exists(subpath): mkdir(subpath) file_path = join(PAGES_DIR, path) wiki_prefix = u"Участник:Vitalik7" letters = alphabet if use_other: # letters += ['-'] letters += '-' for letter in letters: print letter content = builder.get(letter) # letter = ur(letter) if debug: debug_write(file_path, letter, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, letter) save_wiki_page(wiki_title, content, desc) # sleep(1) # break # exit() for letter, pages in builder.separate_pages.items(): for page in sorted(pages, key=lambda x: len(x)): print page if not use_other and page[0] == '-': continue content = builder.get(page) # page = ur(page) if debug: debug_write(file_path, page, content) if push: if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, page) save_wiki_page(wiki_title, content, desc)
if force_wiki_title: wiki_title = force_wiki_title else: wiki_title = "%s/%s" % (wiki_prefix, path) wiki_title = "%s/%s" % (wiki_title, title_page) save_wiki_page(wiki_title, content, desc) # sleep(1) def load_words_from_file(): words = load_lines(ru_index_filename) data = dict() for word in words: word = word.decode('utf-8') if word[0] == '-' or word[-1] == '-' or ' ' in word: continue if word[-1] == '.' or word.upper() == word: continue key = word[::-1] data[key] = word return data LoadRuWords().run() data = load_words_from_file() create_index(data, u"Индекс/Обратный словарь русского языка", u"Обновление обратного индекса", alphabet=russian_alphabet(), push=True)