def generate_red_links_index(): ignore_words_content = get_wiki_page_content(u'Участник:Vitalik/Индекс/Красные ссылки/Игнорируемые слова') ignore_words = list() for line in ignore_words_content.split('\n'): m = re.match('^\* \[\[(.*)\]\]$', line) if not m: print u'ERROR in ignore_words: %s' % line ignore_words.append(m.group(1).encode('utf8')) # print '\n'.join(ignore_words) # exit() page_names = [ u'Участник:Vitalik/Индекс/Красные ссылки/Дополнительные источники/Cinematique', u'Участник:Vitalik/Индекс/Красные ссылки/Дополнительные источники/Cinematique/Недостающие глаголы из причастий', ] cin_words = list() for page_name in page_names: cin_words_content = get_wiki_page_content(page_name) for line in cin_words_content.split('\n'): m = re.match('^[*#] \[\[(.*)\]\]$', line) if not m: print u'ERROR in cin_words: %s' % line cin_words.append(m.group(1).encode('utf8')) # print '\n'.join(cin_words) # exit() index_words = load_lines(join(settings.DATA_PATH, 'wikt_words', 'ru+redirects.txt')) dict_words = load_from_dictionaries() red_words = list((set(dict_words) | set(cin_words)) - set(index_words) - set(ignore_words)) print "Red count: ", len(red_words) # exit() # bos_words = load_from_dictionaries(['bos_barhudarov_filtering_words.txt']) # new_words = list(set(bos_words) - set(red_words) - set(index_words)) # for word in sorted(new_words): # print word # exit() # save_lines(join(settings.DATA_PATH, 'words_red_a.txt'), red_words) save_lines(join(settings.FILES_PATH, 'reports', 'red_links_ru'), sorted(red_words)) # create_index(red_words, u'Индекс/Красные ссылки (без подстраниц)', create_index(red_words, u'Индекс/Красные ссылки', desc=u'Обновление списка красных ссылок', push=True, debug=False, header=u'Красные ссылки', )
def action(self, page, **kwargs): content = kwargs['content'] parts = re.findall('(^|\n)(=[^=\n]+=)\n', content) for part in parts: found = part[1] if found in [u'= Буква (латиница) =', u'= Буква (кириллица) =']: continue # print "* [[%s]]: <code><nowiki>%s</nowiki></code>" % (page.title, found) m = re.match(u'^= *\{\{-(?P<lang>[-a-z]+|Праславянский)-(?P<remove>\|([^}]+|\{\{PAGENAME\}\}|))?\}\} *=$', found, re.IGNORECASE) if not m: print found if m: remove = m.group('remove') # if remove: # print page.title, remove if remove == '|nocat': continue lang = m.group('lang') # print "* %s: %s" % (page.title, lang) if lang != 'ru': continue old_header = m.group(0) new_header = "= {{-%s-}} =" % lang if old_header == new_header: continue self.changed += 1 print dt(), 'changed:', self.changed wiki_content = get_wiki_page_content(page.title) new_wiki_content = wiki_content.replace(old_header, new_header) save_wiki_page(page.title, new_wiki_content, "викификация заголовка первого уровня", wait=5)
def action(self, page, **kwargs): # print page.title super(RemoveBad, self).action(page, **kwargs) title = page.title if ':' in title: return content = page.content if u"{{{" in content: print title print "%s§ion=2" % get_edit_page_url(title) return # if u"основа1={{{" in content: if u"основа={{{" in content: print title print "%s§ion=2" % get_edit_page_url(title) try: old_content = content old_content = get_wiki_page_content(title) except IsRedirectPage: return # new_content = re.sub(u"\|основа\d?=\{\{\{\d\|?\}\}\}(́?\{\{\{\d\|?\}\}\})?\n", '', old_content) new_content = re.sub(u"\|основа=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n", u'|основа=\\1\n', old_content) new_content = re.sub(u"\|слоги=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n", u'|слоги={{по-слогам|\\1}}\n', new_content) if old_content == new_content: # content doesn't change return desc = u'Удаление "странных" параметров' save_wiki_page(title, new_content, desc, wait=5)
def tpl_action(self, page, old_tpl, title, morph, lang, params, tail, pre_tail): new_tpl = self.make_changes(page, old_tpl, title, morph, lang, params, tail, pre_tail) if new_tpl == old_tpl: # tpl doesn't change return print 'changing' # return try: if self.debug: old_content = page.content else: old_content = get_wiki_page_content(page.title) except IsRedirectPage: return new_content = old_content.replace(old_tpl, new_tpl) if old_content == new_content: # content doesn't change print "It's strange: content doesn't change in '%s'!" % \ page.title.encode('cp1251') return if self.debug: print '-' * 80 print new_content else: save_wiki_page(page.title, new_content, self.desc, wait=5)
def action(self, page, **kwargs): # print '=' * 80 # print dt(), page.title content = kwargs['content'] m = re.search(u'=== Смотреть также ===\n([^={]*)', content, flags=re.MULTILINE | re.DOTALL) if m: block_content = m.group(1) # print block_content.strip() for remove in removings: if remove in block_content: print '=' * 80 print dt(), page.title print block_content.strip() old_content = get_wiki_page_content(page.title) new_content = re.sub( u'=== Смотреть также ===\n\s*%s\n' % remove.replace('*', r'\*').replace('[', r'\[').replace(']', r'\]'), u'', old_content) if old_content != new_content: desc = u'Удаление "Смотреть также" со списком имён' save_wiki_page(page.title, new_content, desc, wait=5) print 'saved' else: print 'not changed' else: print u'×××'
def save_report(self, desc): title = u"%s/report" % self.wikt_data_page content = get_wiki_page_content(title) save_wiki_page(title, content + "\n" + self.report, desc) # print '-' * 100 # print self.report # print '-' * 100 self.report = ''
def section_action(self, page, lang, section_content): super(ReplaceOldEmptyMorphoRu, self).section_action(page, lang, section_content) title = page.title if lang != 'ru': return m = re.search(u'(=== *Морфологические и синтаксические свойства *=== *(.*?)\n)=', section_content, re.UNICODE | re.DOTALL) if not m: return old_body = m.group(2) p = re.compile( u"""(\{\{ (?P<title>морфо\s*) # заголовок \|(?P<params>[^{}]+?(\{\{[^{}]+?\}\})?[^{}]+?) # параметры \}\})""", flags=re.UNICODE + re.DOTALL + re.VERBOSE) parts = p.findall(old_body) new_body = old_body for part in parts: old_params = part[2] params = old_params params = re.sub(u'(префд|прист|суфф-?|корень|инф|соед|оконч-?|интер|суффд|частица|постфикс)\d*=', '', params) params = re.sub(u'\|', '', params).strip() if not params: # append_dict_list(self.old_empty_morpho, title, part[0]) new_body = new_body.replace(part[0], u'{{морфо-ru|}}') # params = part[2].strip().split('|') # for value in params: # value = value.strip() # if '=' in value: # name, value = value.split('=', 1) # if '-' in value: # append_dict_list(self.morpho_wrong_hyphens, title, part[0]) # break if old_body != new_body: # print '=' * 100 # print old_body # print '-' * 100 # print new_body # print '=' * 100 old_section_content = section_content new_section_content = section_content.replace(old_body, new_body) try: old_content = get_wiki_page_content(title) except NoPage: send_wiki_mail(subject="pywikibot.exceptions.NoPage: " + title, message=title) return except IsRedirectPage: send_wiki_mail( subject="pywikibot.exceptions.IsRedirectPage: " + title, message=title) return new_content = old_content.replace(old_section_content, new_section_content) if new_content != old_content: desc = u"Замена пустого {{морфо}} на пустой {{морфо-ru}}" save_wiki_page(title, new_content, desc)
def get_page_content(self, title): try: return get_wiki_page_content(title) except NoPage: self.add_report(u'Страница "[[%s]]" не найдена' % title, 'maroon') return None except IsRedirectPage: self.add_report(u'Страница "[[%s]]" является редиректом' % title, 'maroon') return None
def get_words_list_from_wiki(page_title): try: exceptions_content = get_wiki_page_content(page_title) except pywikibot.NoPage: print u'No "wiki_list" page.' return [] words = list() for line in exceptions_content.split('\n'): m = re.match('^\* \[\[(.*)\]\]$', line) if not m: print u'ERROR on "wiki_list" page: "%s"' % line words.append(m.group(1)) return words
def lang_action(self, page, lang, content): content = page.content p = re.compile('\[\[[a-z]{2}:\]\]\n?') if p.search(content): print page.title content = get_wiki_page_content(page.title) new_content = p.sub('', content) if new_content != content: new_content, changed = wikify_headers_spaces(new_content) desc = u'Удаление пустышек interwiki' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() return content
def lang_action(self, page, lang, content): content = page.content if u'{{DEFAULTSORT:' in content: print page.title content = get_wiki_page_content(page.title) new_content = re.sub(u'^\{\{DEFAULTSORT:\w+\}\}\n', '', content, flags=re.UNICODE) if new_content != content: new_content, changed = wikify_headers_spaces(new_content) desc = u'Удаление {{DEFAULTSORT}}' # save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() return content
def lang_action(self, page, lang, content): content = page.content if u'{{Cf|Индекс:Русский язык/Женские имена}}' in content: print page.title content = get_wiki_page_content(page.title) new_content = content.replace( u'==== Значение ====\nЖенское имя\n{{Cf|Индекс:Русский язык/Женские имена}}', u'==== Значение ====\n# Женское имя {{пример|}}') if new_content != content: new_content, changed = wikify_headers_spaces(new_content) desc = u'Удаление {{Cf|Индекс:Русский язык/Женские имена}} из раздела "Значение"' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() return content
def lang_action(self, page, lang, content): content = page.content p = re.compile('\[\[(?:\w{2,3}|zh-min-nan):(?:[^]]+)\]\]') parts = p.findall(content) for part in parts: if p.search(content): print page.title content = get_wiki_page_content(page.title) new_content = p.sub('', content) if new_content != content: new_content, changed = wikify_headers_spaces(new_content) desc = u'Удаление пустышек interwiki' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() return content
def check_ru_cats(): related = ['wikt_ru_verdict', 'wikt_ru_header'] items = LangCode.objects.prefetch_related(*related).order_by('code') for item in items: try: cat = item.wikt_ru_verdict.cat if not cat: continue title = u"Категория:%s" % cat except ObjectDoesNotExist: continue try: print title page = get_wiki_page_content(title) print 'ok' except NoPage: print 'NoPage', '#' * 100
def action(self, page, **kwargs): if page.title == u'々': return print '=' * 80 print dt(), page.title old_content = '' old_content = get_wiki_page_content(page.title) new_content = re.sub( u'\n ===Аналоги ===\s*\n', u'\n=== Синонимы ===\n', old_content ) if old_content != new_content: desc = u'Переименование "Аналоги" в "Синонимы"' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() else: print u'×××'
def action(self, page, **kwargs): super(RodstvBlockGluk, self).action(page, **kwargs) # print '=' * 80 if ':' in page.title: return old_content = page.content p = re.compile(u'\{\{родств-блок([^}]+)\}\}\n\s*\n\{\{родств-блок') if p.search(old_content): print dt(), page.title old_content = get_wiki_page_content(page.title) new_content = p.sub(u'{{родств-блок\\1}}\n{{родств-блок', old_content) if old_content != new_content: desc = u'Удаление лишней пустой строки между блоками "родств-блок"' # print new_content save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() else: print u'×××'
def action(self, page, **kwargs): print '=' * 80 print dt(), page.title return old_content = get_wiki_page_content(page.title) # new_content = re.sub( # u'\n=== Фразеологизмы и устойчивые сочетания ===\n([^=]*)=== Загадки ===\n', # u'\n=== Фразеологизмы и устойчивые сочетания ===\n\\1==== Загадки ====\n', # old_content # ) # if old_content != new_content: # desc = u'Перемещение раздела "Загадки" в "Фразеологизмы и устойчивые словосочетания"' # save_wiki_page(page.title, new_content, desc, wait=5) # # self.stop() # else: # print u'×××' # return new_content = re.sub( u'\n=== Фразеологизмы и устойчивые сочетания ===\n([^=]*)==== Пословицы и поговорки ====\n([^=]*)=== Загадки ===\n', u'\n=== Фразеологизмы и устойчивые сочетания ===\n\\1==== Пословицы и поговорки ====\n\\2==== Загадки ====\n', old_content ) if old_content != new_content: desc = u'Перемещение раздела "Загадки" в "Фразеологизмы и устойчивые словосочетания"' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() else: print u'×××' return if u'=== Фразеологизмы и устойчивые сочетания ===' not in old_content: new_content = re.sub( u'\n=== Загадки ===\n', u'\n=== Фразеологизмы и устойчивые сочетания ===\n\n==== Загадки ====\n', old_content ) if old_content != new_content: desc = u'Перемещение раздела "Загадки" в создаваемый "Фразеологизмы и устойчивые словосочетания"' save_wiki_page(page.title, new_content, desc, wait=5) # self.stop() else: print u'×××'
def get_samples(): content = get_wiki_page_content(u'Участник:Vitalik/Шаблоны_словоизменений/' u'Существительные/Помощь_в_выборе/примеры') data = dict() for part in content.split('\n\n'): part = part.strip() if part.startswith('===='): m = re.search(u'==== \[\[Служебная:Ссылки сюда/Шаблон:сущ ru ([^|]+)|([^]]+)\]\] ====', part) title = m.group(1) # print title data[title] = dict() bases = re.findall(u': (основа\d*)=([^\n]*)\n', part) for key, value in bases: # print key, value data[title][key] = value m = re.search(u': примеры:(.*)', part) samples = m.group(1).strip() # print samples data[title][u'примеры'] = samples # print return data
def process_page(title_pattern, link): title = title_pattern % link print '-' * 40 print title print '-' * 40 try: lines = get_wiki_page_content(title).split('\n') except pywikibot.exceptions.NoPage: print '404', '?' * 100 return [] words = list() for line in lines: if line.startswith(u'см.'): print u'→', line # m = re.match(u'^см. \[\[(/[а-яё])\|([а-яё]{2})\]\]$', line) # m = re.match(u'^см. \[\[(../[а-яё]{2})\|([а-яё]{2})\]\]$', m = re.match(u'^см. \[\[../(?P<link>\w+)\|(?P<text>\w+)\]\]$', line, flags=re.IGNORECASE | re.UNICODE) if not m: print line, '#' * 100 continue sub_link = m.group('link') text = m.group('text') # print sub_link.lower(), text # print repr(sub_link.lower()) # print repr(text) if sub_link.lower() != text: print line, '%' * 100 continue words += process_page(title_pattern, sub_link) elif line.startswith('*'): # print u'·', line m = re.match(u'^\* *\[\[(?P<word>.*)\]\]$', line) if not m: print line, '$' * 100 continue words.append(m.group('word')) return words
print "=" * 100 print word print "-" * 100 print old_value print "-" * 100 print new_value print "-" * 100 print old_report = old_reports[word].get(u"Отчёт") new_report = new_reports[word].get(u"Отчёт") if old_report != new_report: print u"×" * 20, u"ERROR: Difference in reports" continue # make change old_word_content = get_wiki_page_content(word) new_word_content = old_word_content.replace(old_value, new_value) date = datetime.utcnow().strftime("%d.%m.%Y %H:%M") if old_word_content != new_word_content: desc = ( u"Применение изменений со страницы " u"[[Участник:Vitalik/Массовое редактирование/" u"Словоизменение/%s#%s|массового редактирования]]" % (title, word) ) save_wiki_page(word, new_word_content, desc, wait=5) report_append = u"* {{Done|%s}} — " u"''изменения успешно применены к статье \"%s\"''\n" % ( date, word, ) else: report_append = (
def get_content(self, title): return get_wiki_page_content(title)
def run(self): if not self.wikt_data_page or not self.description: raise NotImplementedError() m = re.search(u':Cinemantique/(.+)', self.wikt_data_page) name = m.group(1) on_value = u'* [[%s|%s]] = on' % (self.wikt_data_page, name) c = get_wiki_page_content(u'Участник:Cinemantique/bot') if on_value not in c: print u'bot offline -> exit' return c = get_wiki_page_content(self.wikt_data_page).strip() if not c: return self.add_report(u'Бот запущен', 'silver') self.save_report('started') data = {} items = c.split('\n\n') for item in items: title, values = self.get_item(item, data) if values is None: continue data[title] = values i = 0 for title, values in sorted(data.items(), key=lambda x: x[0]): i += 1 print i # print title content = self.get_page_content(title) if content is None: continue parts = re.split('^= *\{\{-([-\w]+)-(?:\|[^}]*)?\}\} *=$', content, flags=re.MULTILINE) parts.pop(0) sections = [ {'lang': part[0], 'content': part[1]} for part in chunks(parts, 2) ] section_content = '' for section in sections: if section['lang'] == 'ru': section_content = section['content'] res = re.findall('\n== *[^=].*[^=] *==\n', section_content) # print len(res) # for r in res: # print r.encode('utf-8') if len(res) > 1: parts = re.split("^((?:==)(?:[^=].*?[^=])(?:==))$", section_content, flags=re.MULTILINE) sections2 = [ {'header2': '', 'content': parts.pop(0)} ] sections2 += [ {'header2': part[0], 'content': part[1]} for part in chunks(parts, 2) ] count_third = 0 for data in sections2: if '===' in data['content']: count_third += 1 if count_third > 1: self.add_report(u'В статье "[[%s]]" содержатся омонимы, пропускаем.' % title, 'maroon') section_content = None break if not section_content: if section_content == '': self.add_report(u'В статье "[[%s]]" не найдены русские заголовки, пропускаем.' % title, 'maroon') continue new_section_content = \ self.get_new_section_content(title, values, section_content) if new_section_content is None: continue new_content = content.replace(section_content, new_section_content) self.make_change(title, content, new_content) self.add_report(u'Статья "[[%s]]" успешно обновлена.' % title, 'green') self.add_report(u'Бот завершён', 'silver') self.save_report('finished') save_wiki_page(self.wikt_data_page, '', u'Удаление обработанного содержимого - [[%s/report|Отчёт]]' % self.wikt_data_page)
# coding: utf-8 from dictionaries.utils.file import save_file from wikt.commons.utils.wikibot import get_wiki_page_content s = u'|egy=[[𓊪𓏏𓇯]]' with open('test_utf.txt', mode='w') as f: f.write(s.encode('utf-8')) content = get_wiki_page_content(u'небо') save_file(u'небо-t.txt', content, encode=u'utf-8')
def section_action(self, page, lang, section_content): super(ReplaceOldMorphoRu, self).section_action(page, lang, section_content) title = page.title if lang != 'ru': return m = re.search(u'(=== *Морфологические и синтаксические свойства *=== *(.*?)\n)=', section_content, re.UNICODE | re.DOTALL) if not m: return old_body = m.group(2) p = re.compile( u"""(\{\{ (?P<title>морфо\s*) # заголовок \|(?P<params>[^{}]+?(\{\{[^{}]+?\}\})?[^{}]+?) # параметры \}\})""", flags=re.UNICODE + re.DOTALL + re.VERBOSE) parts = p.findall(old_body) new_body = old_body # print title.encode('utf-8') for part in parts: old_params = part[2] is_title = old_params is_title = re.sub(u'(префд|прист|суфф-?|корень|инф|соед|оконч-?|интер|суффд|частица|постфикс|источник)\d*=', '', is_title) is_title = re.sub(u'\|', '', is_title).strip() if is_title == title or is_title == title + u'т' or is_title == title + u'к': # print # print title # print part[0] # {{морфо||адрес|ова|суфф2=нн|ый}} try: sres = get_sorted_list(get_old_list(old_params)) except ValueError as e: print title.encode('utf-8') # sent = False # try: # send_mail(subject="morpho exception in: " + title, # message=title + "\n\n" + repr(e), # from_email=settings.DEFAULT_FROM_EMAIL, # recipient_list=['*****@*****.**']) # sent = True # except: # pass # if sent: # return # else: # raise raise # for key, value in sres: # print "- %s = '%s'" % (key, value) res = get_new_list(sres) ress = '|'.join(res) # print ress new_body = new_body.replace(part[0], u'{{морфо-ru|%s}}' % ress) # params = part[2].strip().split('|') # for value in params: # value = value.strip() # if '=' in value: # name, value = value.split('=', 1) # if '-' in value: # append_dict_list(self.morpho_wrong_hyphens, title, part[0]) # break elif not is_title: new_body = new_body.replace(part[0], u'{{морфо-ru|}}') if old_body != new_body: # print '=' * 100 # print old_body # print '-' * 100 # print new_body # print '=' * 100 old_section_content = section_content new_section_content = section_content.replace(old_body, new_body) try: old_content = get_wiki_page_content(title) except NoPage: send_wiki_mail(subject="pywikibot.exceptions.NoPage: " + title, message=title) return except IsRedirectPage: send_wiki_mail( subject="pywikibot.exceptions.IsRedirectPage: " + title, message=title ) return new_content = old_content.replace(old_section_content, new_section_content) if new_content != old_content: desc = u"Замена {{морфо}} на {{морфо-ru}}" # self.n_step += 1 # if self.n_step >= 100: # self.stop() new_content = default_wikifier(new_content) save_wiki_page(title, new_content, desc) # todo: create special function for this: d = datetime.now().strftime("%Y-%m-%d %H:%M:%S") append_file(os.path.join(settings.FILES_PATH, 'reports', self.report_name), "%s: %s" % (d, title), encode='utf-8')
u'Шаблон:гл ru 6°b/cXСВ', u'Шаблон:гл ru 6a^-б-сяСВ', u'Шаблон:гл ru 6b-я-сяСВ', u'Шаблон:гл ru 6c-б-сяСВ', u'Шаблон:гл ru 6c-ёСВ', u'Шаблон:гл ru 6c-л-сяСВ', u'Шаблон:гл ru 6c-сяСВ', u'Шаблон:гл ru 6cXСВ', ] for title in titles: # print title # if title == u"Шаблон:гл ru 3aСВ": # skip = False # if skip: # continue old_content = get_wiki_page_content(title) new_content = old_content # old_content = article.get() # if "[" in old_content: # print u'→ "[" found!' # p = re.compile(u'(?P<source>(?P<prefix>\|[Прич|Деепр][А-Яа-я]* *= *)\[\[(?P<link>[^|!]+)(\||\{\{!\}\})(?P<text>[^]]+)\]\](, \[\[(?P<link2>[^|!]+)(\||\{\{!\}\})(?P<text2>[^]]+)\]\])?)', re.UNICODE) p = re.compile(u'(?P<source>(?P<prefix>\|[А-Яа-я]+ *= *)\[\[(?P<link>[^|!]+)(\||\{\{!\}\})(?P<text>[^]]+)\]\](, \[\[(?P<link2>[^|!]+)(\||\{\{!\}\})(?P<text2>[^]]+)\]\])?)', re.UNICODE) items = p.findall(old_content) if items: print print title count2 += 1 for item in items: count += 1 # print item # for m in item:
def lang_action(self, page, lang, content): if lang != 'ru': return content parts = re.findall(u'=== ?Морфологические и синтаксические свойства ?===\n(.*?)\n===', content, flags=re.DOTALL) if parts: for part in parts: part = part.strip() if not re.search(u'Наречие', part): continue # m = re.match(u"(?P<slogi>('''|<b>)([^'<]+)('''|</b>)\n\n)Наречие(, неизменяемое)?\.?(?P<sravn>( Сравнительная степень ?[-—:]|, сравн. форма) '''?[^']+'''?\.?)(?P<other>.*)", part) m = re.match(u"^(?P<slogi>('''|<b>)(?P<word>[^'<]*)('''|</b>)\s*\n\n)?Наречие(?P<or>(, (?P<or1>(вводн(ое|\.)|союзное) слово|союз|частица|предлог|числительное|предикатив))? или (?P<or2>(вводн(ое|\.)|союзное) слово|союз|частица|предлог|числительное|предикатив)|)([,;] неи[из]меняемое)?\.?(?P<sravn>( Ср(\.|авн(\.|ительная)) степень[\s ]?[-—:]?|, сравн. форма) ('''?|\[\[)?(?P<sravn_value>[^\n]+)('''?|\]\])?\.?\s*)?(?P<other>\s*\n\s*\n(\{\{морфо\s*\||приставка:).*)?$", part) if m: self.counter += 1 other = m.group('other') # print '=' * 100 # print '-' * 100 word = m.group('word') if word and '{' in word: continue if word: if '-' not in page.title:# and False: # print page.title, self.counter#, u'—', # print word word = word.replace('-', '|').replace(u'·', u'|·|') # print u"{{по-слогам|%s}}" % word # print pass else: word = word.replace(u'—', u'-') part1, part2 = word.split('-', 1) # print page.title # print word word = "%s-%s" % (part1, part2.replace('-', '|')) # print word # print else: word = page.title slogi = u"|слоги={{по-слогам|%s}}\n" % word or_value = m.group('or') or1 = m.group('or1') or2 = m.group('or2') if or_value: def get_value(or_val): if or_val == u'союзное слово': or_val = u'союз' if or_val == u'вводн. слово': or_val = u'вводное слово' plural = { u'предикатив': u'предикативы', u'частица': u'частицы', u'вводное слово': u'вводные слова', u'числительное': u'числительные', u'предлог': u'предлоги', u'союз': u'союзы', } return or_val, plural.get(or_val, '') # print page.title, self.counter#, u'—', or2, or2p = get_value(or2) or_params = u'|или=%s\n|или-кат=%s\n' % (or2, or2p) # print or2, or2p if or1: or1, or1p = get_value(or1) or_params += u'|или1=%s\n|или-кат1=%s\n' % \ (or1, or1p) # print or1, or1p # print else: or_params = u'|или=\n|или-кат=\n' sravn = m.group('sravn_value') if sravn: # print page.title, self.counter#, u'—', remove_end_sravn = ["''.", ']]', "'''", "''", ".'''", "]].", '.'] for end in remove_end_sravn: if sravn.endswith(end): sravn = sravn[:-len(end)] sravn = sravn.replace("'', ''", ", ").replace(';', ',') # print sravn # print else: sravn = '' sravn = u"|степень=%s\n" % sravn # print '-' * 100 # print part print '=' * 100 print page.title print '-' * 100 result = '{{adv ru\n' result += slogi result += u'|тип=\n' result += u'|класс=\n' result += sravn result += or_params result += '}}\n\n' result += other.strip() if other else u'{{морфо||||}}' print result print '-' * 100 print part print '=' * 100 print pass content = get_wiki_page_content(page.title) content, changed = wikify_headers_spaces(content) content = content.replace(part, result) desc = u'Использование шаблона "adv ru"' save_wiki_page(page.title, content, desc, wait=5) else: # print '=' * 100 # print page.title, u'×' * 50 # print '-' * 100 # print part # print '=' * 100 pass return content
def get_templates(self, page_title): content = get_wiki_page_content(page_title) templates = re.findall(u"\{\{template\|([^|}]+)\}\}", content) if "=" in templates: templates.append("=") return templates
def load2(): prefix = u'Категория:Шаблоны словоизменений/' pages = Page.objects.prefetch_related('page_content').\ filter(title__startswith=prefix) for page in pages: # print '=' * 100 # print title = page.title suffix = title[len(prefix):] # print page.title # print suffix if not re.match('^[-a-z]+$', suffix): # print u'×' * 100 continue # new_cat_title = u'Категория:Шаблоны:Словоизменение/%s' % suffix # new_content = u'[[Категория:Шаблоны:Словоизменение/По языкам|%s]]\n' \ # u'[[Категория:Шаблоны/%s|Словоизменение]]\n' \ # % (suffix, suffix) # save_wiki_page(new_cat_title, new_content, # u"создание категорий") # print page.title print print suffix, get_edit_page_url(title) print suffix, get_edit_page_url(title.replace( u'Категория:Шаблоны словоизменений', u'Категория:Шаблоны:Словоизменение')) print '-' * 10 # print 'ok' content = page.content # print '-' * 50 # print content # continue for line in content.split('\n'): if not line.strip(): continue m = re.match(u'^\[\[Категория:Шаблоны словоизменений по языкам(\|([-a-z]+))?\]\]$', line) if m: print u'По языкам — ok' continue lang = m.group(2) if not lang: print print suffix print line print u'я →', lang old_content = get_wiki_page_content(title) new_content = old_content.replace( line, u'[[Категория:Шаблоны словоизменений по языкам|%s]]' % suffix) if old_content != new_content: desc = u'приведение описания категорий к единому стилю' save_wiki_page(title, new_content, desc, wait=5) break else: print u'×', title pass elif lang != suffix: # print # print suffix # print line # print u'я →', lang pass else: # print # print suffix # print line # print u'я →', lang pass continue m = re.match(u'^\[\[Категория:Шаблоны/([-a-z]+)(\|[*А-Яа-я ]+)?\]\]$', line) if m: print u'Шаблоны — ok' continue lang = m.group(1) sort = m.group(2) # if lang != suffix or sort != u'|*Ш': if lang != suffix: # print line # print u'ш →', lang, u'—' ,sort continue # else: # print line # print u'ш →', lang, u'—' ,sort old_content = get_wiki_page_content(title) new_content = old_content.replace( line, u'[[Категория:Шаблоны/%s|Словоизменение]]' % lang) if old_content != new_content: desc = u'приведение описания категорий к единому стилю' save_wiki_page(title, new_content, desc, wait=5) break else: print u'×', title continue m = re.match(u'^\[\[Категория:Викисловарь:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]$', line) if m: name = m.group(1) sort = m.group(2) print u'Категория — %s — ok' % name print '=' * 100 print line print '=' * 100 continue p = re.compile(u'^\[\[Категория:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]$') m = p.match(line) if m: name = m.group(1) sort = m.group(2) print u'Категория — %s — ×××?' % name # if u'[[Категория:Шаблоны/' not in content: # old_content = get_wiki_page_content(title) # new_content = re.sub(u'\[\[Категория:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]', # u'[[Категория:Шаблоны/%s|Словоизменение]]' % suffix, # old_content) # if old_content != new_content: # desc = u'замена основной категории языка на "Шаблоны/%s"' % suffix # save_wiki_page(title, new_content, desc, wait=5) # break # else: # print u'#' * 100, title # pass continue # print line if sort: if sort != u'|*Ш': # print # print suffix # print line # print u'к →', '...', u'—' , sort pass else: print print suffix print line print u'к →', name, u'—' , sort continue # if sort != u'|*Ш': # print line # print u'к →', '...', u'—' ,sort continue m = re.match(u'^\[\[[a-z]+:([^]]+)]\]$', line) if m: print 'lang', u'$' * 40 print '=' * 100 print line print '=' * 100 # print u'l →', m.group(1) continue if line == u'[[Категория:Шаблоны словоизменений/zh]]': print 'base' print '=' * 100 print line print '=' * 100 # print u'с →', 'ok' continue print line, '×' * 100 print '=' * 100 print line print '=' * 100
def get_templates(self, page_title): content = get_wiki_page_content(page_title) return re.findall(u'\{\{template\|([^|}]+)\}\}', content)
def action(self, page, **kwargs): super(SlogiReplacer, self).action(page, **kwargs) title = page.title if ':' in title: return content = page.content if u"слоги=" not in content: return p = re.compile(u"слоги=(?P<value>.*)\n") values = p.findall(content) # if self.i > 10000: # self.stop() # print title # print "%s§ion=2" % get_edit_page_url(title) for value in values: m = re.match(u"^\{\{по[- ]слогам\|(?P<value2>[^}]*)\}\}(?P<tail>.*)$", value) if m: # начинается с "{{по-слогам|" value2 = m.group('value2') tail = m.group('tail') if tail: # после "{{по-слогам|...}}" что-то есть m2 = re.match(u'\s*(?P<close_parent>\|*\}\})?\s*(?P<tail2>.*)', tail) close_parent = m2.group('close_parent') tail2 = m2.group('tail2') if tail2 and not tail.strip().startswith('|') \ and not close_parent \ and u'{{по-слогам|' not in tail2: # print title, "%s§ion=2" % get_edit_page_url(title) # print u'→', value2 # print u'→', tail # print pass if value2 and base_word(value2) != base_word(title): # print u'→', title, "(1) %s§ion=2" % get_edit_page_url(title) # print base_word(title) # print base_word(value2) # print u'→', value2 # print repr(base_word(title)) # print repr(base_word(value2)) self.content += out(title, value2) + '\n' # if u'—' in value2: # new_value2 = value2.replace(u'—', u'-') # new_value = value.replace(value2, new_value2) # old_content = get_wiki_page(title).get() # new_content = old_content.replace( # u'слоги=%s' % value, # u'слоги=%s' % new_value, # ) # if old_content != new_content: # desc = u'замена длинного тире на дефис в "слоги="' # save_wiki_page(title, new_content, desc, wait=5) # else: # print u'×', title # print '---' # print value2.strip().replace(u'é', u'е') # print title # print repr(value2.strip().replace(u'é', u'е')) # print repr(title) # print pass # \u0311, \xad # if base_word(value2).replace(u'é', u'е') == title: # if base_word(value2).replace(u'á', u'а') == title: # print "(1) %s§ion=2" % get_edit_page_url(title) # print title # print value2 # print # pass # old_content = get_wiki_page(title).get() # new_content = old_content.replace( # u"слоги={{по-слогам|%s}}" % value2, # u"слоги={{по-слогам|%s}}" % title) # if old_content != new_content: # desc = u'добавление {{по-слогам}} с заменой "е" на "ё"' # save_wiki_page(title, new_content, desc, wait=5) else: # ok pass else: m = re.match(u"^(?P<value2>\{\{PAGENAME\}\}|[^|}]*)(?P<tail>.*)$", value) if m: value2 = m.group('value2') tail = m.group('tail') if tail: if tail.startswith('|') or tail.startswith('}}'): # print title, "%s§ion=2" % get_edit_page_url(title) # print u'→', value2 # print u'→', tail # print pass else: # print title, "%s§ion=2" % get_edit_page_url(title) # print u'→', value2 # print u'→', tail # print pass if value2 and base_word(value2) != base_word(title) \ and value2 != u'{{PAGENAME}}': # print u'→', title, "(2) %s§ion=2" % get_edit_page_url(title) # print base_word(title) # print base_word(value2) # print u'→', value2 # print repr(base_word(title)) # print repr(base_word(value2)) # self.content += out(title, value2) + '\n' # if u'—' in value2: # new_value2 = value2.replace(u'—', u'-') # new_value = value.replace(value2, new_value2) # old_content = get_wiki_page(title).get() # new_content = old_content.replace( # u'слоги=%s' % value, # u'слоги=%s' % new_value, # ) # if old_content != new_content: # desc = u'замена длинного тире на дефис в "слоги="' # save_wiki_page(title, new_content, desc, wait=5) # else: # print u'×', title # print '---' # print value2 # print title.replace(u'ё', u'е') # print repr(value2) # print repr(title.replace(u'ё', u'е')) # print '---' # print value2.strip().replace(u'é', u'е') # print title # print repr(value2.strip().replace(u'é', u'е')) # print repr(title) # print if title.replace(u'ё', u'е') == value2.strip(): # print "%s§ion=2" % get_edit_page_url(title) # print title # print value2 # print # # old_content = get_wiki_page(title).get() # new_content = old_content.replace( # u"слоги=%s" % value2, # u"слоги={{по-слогам|%s}}" % title) # if old_content != new_content: # desc = u'добавление {{по-слогам}} с заменой "е" на "ё"' # save_wiki_page(title, new_content, desc, wait=5) pass # if value2.strip().replace(u'é', u'е') == title: # if value2.strip().replace(u'á', u'а') == title: # print "(2) %s§ion=2" % get_edit_page_url(title) # print title # print value2 # print # pass pass else: # ok pass # print return try: old_content = content old_content = get_wiki_page_content(title) except IsRedirectPage: return # new_content = re.sub(u"\|основа\d?=\{\{\{\d\|?\}\}\}(́?\{\{\{\d\|?\}\}\})?\n", '', old_content) new_content = re.sub(u"\|основа=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n", u'|основа=\\1\n', old_content) new_content = re.sub(u"\|слоги=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n", u'|слоги={{по-слогам|\\1}}\n', new_content) if old_content == new_content: # content doesn't change return desc = u'Удаление "странных" параметров' save_wiki_page(title, new_content, desc, wait=5)