Python get_wiki_page_content 예제들, wikt.commons.utils.wikibot.get_wiki_page_content Python 예제들

예제 #1

0

파일 보기

파일: start_red_links.py 프로젝트: 2vitalik/words

def generate_red_links_index():
    ignore_words_content = get_wiki_page_content(u'Участник:Vitalik/Индекс/Красные ссылки/Игнорируемые слова')
    ignore_words = list()
    for line in ignore_words_content.split('\n'):
        m = re.match('^\* \[\[(.*)\]\]$', line)
        if not m:
            print u'ERROR in ignore_words: %s' % line
        ignore_words.append(m.group(1).encode('utf8'))
    # print '\n'.join(ignore_words)
    # exit()

    page_names = [
        u'Участник:Vitalik/Индекс/Красные ссылки/Дополнительные источники/Cinematique',
        u'Участник:Vitalik/Индекс/Красные ссылки/Дополнительные источники/Cinematique/Недостающие глаголы из причастий',
    ]
    cin_words = list()
    for page_name in page_names:
        cin_words_content = get_wiki_page_content(page_name)
        for line in cin_words_content.split('\n'):
            m = re.match('^[*#] \[\[(.*)\]\]$', line)
            if not m:
                print u'ERROR in cin_words: %s' % line
            cin_words.append(m.group(1).encode('utf8'))
        # print '\n'.join(cin_words)
    # exit()

    index_words = load_lines(join(settings.DATA_PATH, 'wikt_words',
                                  'ru+redirects.txt'))
    dict_words = load_from_dictionaries()
    red_words = list((set(dict_words) | set(cin_words)) - set(index_words) - set(ignore_words))
    print "Red count: ", len(red_words)
    # exit()

    # bos_words = load_from_dictionaries(['bos_barhudarov_filtering_words.txt'])
    # new_words = list(set(bos_words) - set(red_words) - set(index_words))
    # for word in sorted(new_words):
    #     print word
    # exit()

    # save_lines(join(settings.DATA_PATH, 'words_red_a.txt'), red_words)
    save_lines(join(settings.FILES_PATH, 'reports', 'red_links_ru'),
               sorted(red_words))

    # create_index(red_words, u'Индекс/Красные ссылки (без подстраниц)',
    create_index(red_words, u'Индекс/Красные ссылки',
                 desc=u'Обновление списка красных ссылок',
                 push=True, debug=False,
                 header=u'Красные ссылки',
    )

예제 #2

0

파일 보기

파일: language.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     content = kwargs['content']
     parts = re.findall('(^|\n)(=[^=\n]+=)\n', content)
     for part in parts:
         found = part[1]
         if found in [u'= Буква (латиница) =', u'= Буква (кириллица) =']:
             continue
         # print "* [[%s]]: <code><nowiki>%s</nowiki></code>" % (page.title, found)
         m = re.match(u'^= *\{\{-(?P<lang>[-a-z]+|Праславянский)-(?P<remove>\|([^}]+|\{\{PAGENAME\}\}|))?\}\} *=$',
                      found, re.IGNORECASE)
         if not m:
             print found
         if m:
             remove = m.group('remove')
             # if remove:
             #     print page.title, remove
             if remove == '|nocat':
                 continue
             lang = m.group('lang')
             # print "* %s: %s" % (page.title, lang)
             if lang != 'ru':
                 continue
             old_header = m.group(0)
             new_header = "= {{-%s-}} =" % lang
             if old_header == new_header:
                 continue
             self.changed += 1
             print dt(), 'changed:', self.changed
             wiki_content = get_wiki_page_content(page.title)
             new_wiki_content = wiki_content.replace(old_header, new_header)
             save_wiki_page(page.title, new_wiki_content,
                            "викификация заголовка первого уровня",
                            wait=5)

예제 #3

0

파일 보기

파일: remove_bad.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     # print page.title
     super(RemoveBad, self).action(page, **kwargs)
     title = page.title
     if ':' in title:
         return
     content = page.content
     if u"{{{" in content:
         print title
         print "%s&section=2" % get_edit_page_url(title)
     return
     # if u"основа1={{{" in content:
     if u"основа={{{" in content:
         print title
         print "%s&section=2" % get_edit_page_url(title)
         try:
             old_content = content
             old_content = get_wiki_page_content(title)
         except IsRedirectPage:
             return
         # new_content = re.sub(u"\|основа\d?=\{\{\{\d\|?\}\}\}(́?\{\{\{\d\|?\}\}\})?\n", '', old_content)
         new_content = re.sub(u"\|основа=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n",
                              u'|основа=\\1\n',
                              old_content)
         new_content = re.sub(u"\|слоги=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n",
                              u'|слоги={{по-слогам|\\1}}\n',
                              new_content)
         if old_content == new_content:  # content doesn't change
             return
         desc = u'Удаление "странных" параметров'
         save_wiki_page(title, new_content, desc, wait=5)

예제 #4

0

파일 보기

파일: inflection.py 프로젝트: 2vitalik/words

 def tpl_action(self, page, old_tpl, title, morph, lang, params, tail,
                pre_tail):
     new_tpl = self.make_changes(page, old_tpl, title, morph, lang, params,
                                 tail, pre_tail)
     if new_tpl == old_tpl:  # tpl doesn't change
         return
     print 'changing'
     # return
     try:
         if self.debug:
             old_content = page.content
         else:
             old_content = get_wiki_page_content(page.title)
     except IsRedirectPage:
         return
     new_content = old_content.replace(old_tpl, new_tpl)
     if old_content == new_content:  # content doesn't change
         print "It's strange: content doesn't change in '%s'!" % \
             page.title.encode('cp1251')
         return
     if self.debug:
         print '-' * 80
         print new_content
     else:
         save_wiki_page(page.title, new_content, self.desc, wait=5)

예제 #5

0

파일 보기

파일: remove_header_sm_takzhe.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     # print '=' * 80
     # print dt(), page.title
     content = kwargs['content']
     m = re.search(u'=== Смотреть также ===\n([^={]*)', content, flags=re.MULTILINE | re.DOTALL)
     if m:
         block_content = m.group(1)
         # print block_content.strip()
         for remove in removings:
             if remove in block_content:
                 print '=' * 80
                 print dt(), page.title
                 print block_content.strip()
                 old_content = get_wiki_page_content(page.title)
                 new_content = re.sub(
                     u'=== Смотреть также ===\n\s*%s\n' % remove.replace('*', r'\*').replace('[', r'\[').replace(']', r'\]'),
                     u'', old_content)
                 if old_content != new_content:
                     desc = u'Удаление "Смотреть также" со списком имён'
                     save_wiki_page(page.title, new_content, desc, wait=5)
                     print 'saved'
                 else:
                     print 'not changed'
     else:
         print u'×××'

예제 #6

0

파일 보기

파일: _base.py 프로젝트: 2vitalik/words

 def save_report(self, desc):
     title = u"%s/report" % self.wikt_data_page
     content = get_wiki_page_content(title)
     save_wiki_page(title, content + "\n" + self.report, desc)
     # print '-' * 100
     # print self.report
     # print '-' * 100
     self.report = ''

예제 #7

0

파일 보기

파일: replace_old_empty_morpho.py 프로젝트: 2vitalik/words

 def section_action(self, page, lang, section_content):
     super(ReplaceOldEmptyMorphoRu, self).section_action(page, lang, section_content)
     title = page.title
     if lang != 'ru':
         return
     m = re.search(u'(=== *Морфологические и синтаксические свойства *=== *(.*?)\n)=',
                   section_content, re.UNICODE | re.DOTALL)
     if not m:
         return
     old_body = m.group(2)
     p = re.compile(
         u"""(\{\{
             (?P<title>морфо\s*)  # заголовок
             \|(?P<params>[^{}]+?(\{\{[^{}]+?\}\})?[^{}]+?)  # параметры
             \}\})""",
         flags=re.UNICODE + re.DOTALL + re.VERBOSE)
     parts = p.findall(old_body)
     new_body = old_body
     for part in parts:
         old_params = part[2]
         params = old_params
         params = re.sub(u'(префд|прист|суфф-?|корень|инф|соед|оконч-?|интер|суффд|частица|постфикс)\d*=', '', params)
         params = re.sub(u'\|', '', params).strip()
         if not params:
             # append_dict_list(self.old_empty_morpho, title, part[0])
             new_body = new_body.replace(part[0], u'{{морфо-ru|}}')
         # params = part[2].strip().split('|')
         # for value in params:
         #     value = value.strip()
         #     if '=' in value:
         #         name, value = value.split('=', 1)
         #     if '-' in value:
         #         append_dict_list(self.morpho_wrong_hyphens, title, part[0])
         #         break
     if old_body != new_body:
         # print '=' * 100
         # print old_body
         # print '-' * 100
         # print new_body
         # print '=' * 100
         old_section_content = section_content
         new_section_content = section_content.replace(old_body, new_body)
         try:
             old_content = get_wiki_page_content(title)
         except NoPage:
             send_wiki_mail(subject="pywikibot.exceptions.NoPage: " + title,
                            message=title)
             return
         except IsRedirectPage:
             send_wiki_mail(
                 subject="pywikibot.exceptions.IsRedirectPage: " + title,
                 message=title)
             return
         new_content = old_content.replace(old_section_content, new_section_content)
         if new_content != old_content:
             desc = u"Замена пустого {{морфо}} на пустой {{морфо-ru}}"
             save_wiki_page(title, new_content, desc)

예제 #8

0

파일 보기

파일: _base.py 프로젝트: 2vitalik/words

 def get_page_content(self, title):
     try:
         return get_wiki_page_content(title)
     except NoPage:
         self.add_report(u'Страница "[[%s]]" не найдена' % title, 'maroon')
         return None
     except IsRedirectPage:
         self.add_report(u'Страница "[[%s]]" является редиректом' % title,
                         'maroon')
         return None

예제 #9

0

파일 보기

파일: wiki.py 프로젝트: 2vitalik/words

def get_words_list_from_wiki(page_title):
    try:
        exceptions_content = get_wiki_page_content(page_title)
    except pywikibot.NoPage:
        print u'No "wiki_list" page.'
        return []
    words = list()
    for line in exceptions_content.split('\n'):
        m = re.match('^\* \[\[(.*)\]\]$', line)
        if not m:
            print u'ERROR on "wiki_list" page: "%s"' % line
        words.append(m.group(1))
    return words

예제 #10

0

파일 보기

파일: just_replace.py 프로젝트: 2vitalik/words

 def lang_action(self, page, lang, content):
     content = page.content
     p = re.compile('\[\[[a-z]{2}:\]\]\n?')
     if p.search(content):
         print page.title
         content = get_wiki_page_content(page.title)
         new_content = p.sub('', content)
         if new_content != content:
             new_content, changed = wikify_headers_spaces(new_content)
             desc = u'Удаление пустышек interwiki'
             save_wiki_page(page.title, new_content, desc, wait=5)
             # self.stop()
     return content

예제 #11

0

파일 보기

파일: just_replace.py 프로젝트: 2vitalik/words

 def lang_action(self, page, lang, content):
     content = page.content
     if u'{{DEFAULTSORT:' in content:
         print page.title
         content = get_wiki_page_content(page.title)
         new_content = re.sub(u'^\{\{DEFAULTSORT:\w+\}\}\n', '', content,
                              flags=re.UNICODE)
         if new_content != content:
             new_content, changed = wikify_headers_spaces(new_content)
             desc = u'Удаление {{DEFAULTSORT}}'
             # save_wiki_page(page.title, new_content, desc, wait=5)
             # self.stop()
     return content

예제 #12

0

파일 보기

파일: just_replace.py 프로젝트: 2vitalik/words

 def lang_action(self, page, lang, content):
     content = page.content
     if u'{{Cf|Индекс:Русский язык/Женские имена}}' in content:
         print page.title
         content = get_wiki_page_content(page.title)
         new_content = content.replace(
             u'==== Значение ====\nЖенское имя\n{{Cf|Индекс:Русский язык/Женские имена}}',
             u'==== Значение ====\n# Женское имя {{пример|}}')
         if new_content != content:
             new_content, changed = wikify_headers_spaces(new_content)
             desc = u'Удаление {{Cf|Индекс:Русский язык/Женские имена}} из раздела "Значение"'
             save_wiki_page(page.title, new_content, desc, wait=5)
             # self.stop()
     return content

예제 #13

0

파일 보기

파일: just_replace.py 프로젝트: 2vitalik/words

 def lang_action(self, page, lang, content):
     content = page.content
     p = re.compile('\[\[(?:\w{2,3}|zh-min-nan):(?:[^]]+)\]\]')
     parts = p.findall(content)
     for part in parts:
         if p.search(content):
             print page.title
             content = get_wiki_page_content(page.title)
             new_content = p.sub('', content)
             if new_content != content:
                 new_content, changed = wikify_headers_spaces(new_content)
                 desc = u'Удаление пустышек interwiki'
                 save_wiki_page(page.title, new_content, desc, wait=5)
                 # self.stop()
     return content

예제 #14

0

파일 보기

파일: gen_wiki.py 프로젝트: 2vitalik/words

def check_ru_cats():
    related = ['wikt_ru_verdict', 'wikt_ru_header']
    items = LangCode.objects.prefetch_related(*related).order_by('code')
    for item in items:
        try:
            cat = item.wikt_ru_verdict.cat
            if not cat:
                continue
            title = u"Категория:%s" % cat
        except ObjectDoesNotExist:
            continue
        try:
            print title
            page = get_wiki_page_content(title)
            print 'ok'
        except NoPage:
            print 'NoPage', '#' * 100

예제 #15

0

파일 보기

파일: analogi.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     if page.title == u'々':
         return
     print '=' * 80
     print dt(), page.title
     old_content = ''
     old_content = get_wiki_page_content(page.title)
     new_content = re.sub(
         u'\n ===Аналоги ===\s*\n',
         u'\n=== Синонимы ===\n',
         old_content
     )
     if old_content != new_content:
         desc = u'Переименование "Аналоги" в "Синонимы"'
         save_wiki_page(page.title, new_content, desc, wait=5)
         # self.stop()
     else:
         print u'×××'

예제 #16

0

파일 보기

파일: rodstv_blok_gluk.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     super(RodstvBlockGluk, self).action(page, **kwargs)
     # print '=' * 80
     if ':' in page.title:
         return
     old_content = page.content
     p = re.compile(u'\{\{родств-блок([^}]+)\}\}\n\s*\n\{\{родств-блок')
     if p.search(old_content):
         print dt(), page.title
         old_content = get_wiki_page_content(page.title)
         new_content = p.sub(u'{{родств-блок\\1}}\n{{родств-блок',
                             old_content)
         if old_content != new_content:
             desc = u'Удаление лишней пустой строки между блоками "родств-блок"'
             # print new_content
             save_wiki_page(page.title, new_content, desc, wait=5)
             # self.stop()
         else:
             print u'×××'

예제 #17

0

파일 보기

파일: poslovitsy.py 프로젝트: 2vitalik/words

 def action(self, page, **kwargs):
     print '=' * 80
     print dt(), page.title
     return
     old_content = get_wiki_page_content(page.title)
     # new_content = re.sub(
     #     u'\n=== Фразеологизмы и устойчивые сочетания ===\n([^=]*)=== Загадки ===\n',
     #     u'\n=== Фразеологизмы и устойчивые сочетания ===\n\\1==== Загадки ====\n',
     #     old_content
     # )
     # if old_content != new_content:
     #     desc = u'Перемещение раздела "Загадки" в "Фразеологизмы и устойчивые словосочетания"'
     #     save_wiki_page(page.title, new_content, desc, wait=5)
     #     # self.stop()
     # else:
     #     print u'×××'
     # return
     new_content = re.sub(
         u'\n=== Фразеологизмы и устойчивые сочетания ===\n([^=]*)==== Пословицы и поговорки ====\n([^=]*)=== Загадки ===\n',
         u'\n=== Фразеологизмы и устойчивые сочетания ===\n\\1==== Пословицы и поговорки ====\n\\2==== Загадки ====\n',
         old_content
     )
     if old_content != new_content:
         desc = u'Перемещение раздела "Загадки" в "Фразеологизмы и устойчивые словосочетания"'
         save_wiki_page(page.title, new_content, desc, wait=5)
         # self.stop()
     else:
         print u'×××'
     return
     if u'=== Фразеологизмы и устойчивые сочетания ===' not in old_content:
         new_content = re.sub(
             u'\n=== Загадки ===\n',
             u'\n=== Фразеологизмы и устойчивые сочетания ===\n\n==== Загадки ====\n',
             old_content
         )
         if old_content != new_content:
             desc = u'Перемещение раздела "Загадки" в создаваемый "Фразеологизмы и устойчивые словосочетания"'
             save_wiki_page(page.title, new_content, desc, wait=5)
             # self.stop()
         else:
             print u'×××'

예제 #18

0

파일 보기

파일: f.py 프로젝트: 2vitalik/words

def get_samples():
    content = get_wiki_page_content(u'Участник:Vitalik/Шаблоны_словоизменений/'
                                    u'Существительные/Помощь_в_выборе/примеры')
    data = dict()
    for part in content.split('\n\n'):
        part = part.strip()
        if part.startswith('===='):
            m = re.search(u'==== \[\[Служебная:Ссылки сюда/Шаблон:сущ ru ([^|]+)|([^]]+)\]\] ====', part)
            title = m.group(1)
            # print title
            data[title] = dict()
            bases = re.findall(u': (основа\d*)=([^\n]*)\n', part)
            for key, value in bases:
                # print key, value
                data[title][key] = value
            m = re.search(u': примеры:(.*)', part)
            samples = m.group(1).strip()
            # print samples
            data[title][u'примеры'] = samples
            # print
    return data

예제 #19

0

파일 보기

파일: index_bg.py 프로젝트: 2vitalik/words

def process_page(title_pattern, link):
    title = title_pattern % link
    print '-' * 40
    print title
    print '-' * 40
    try:
        lines = get_wiki_page_content(title).split('\n')
    except pywikibot.exceptions.NoPage:
        print '404', '?' * 100
        return []
    words = list()
    for line in lines:
        if line.startswith(u'см.'):
            print u'→', line
            # m = re.match(u'^см. \[\[(/[а-яё])\|([а-яё]{2})\]\]$', line)
            # m = re.match(u'^см. \[\[(../[а-яё]{2})\|([а-яё]{2})\]\]$',
            m = re.match(u'^см. \[\[../(?P<link>\w+)\|(?P<text>\w+)\]\]$',
                         line, flags=re.IGNORECASE | re.UNICODE)
            if not m:
                print line, '#' * 100
                continue
            sub_link = m.group('link')
            text = m.group('text')
            # print sub_link.lower(), text
            # print repr(sub_link.lower())
            # print repr(text)
            if sub_link.lower() != text:
                print line, '%' * 100
                continue
            words += process_page(title_pattern, sub_link)
        elif line.startswith('*'):
            # print u'·', line
            m = re.match(u'^\* *\[\[(?P<word>.*)\]\]$', line)
            if not m:
                print line, '$' * 100
                continue
            words.append(m.group('word'))
    return words

예제 #20

0

파일 보기

파일: recent_mass.py 프로젝트: 2vitalik/words

                print "=" * 100
                print word
                print "-" * 100
                print old_value
                print "-" * 100
                print new_value
                print "-" * 100
                print
                old_report = old_reports[word].get(u"Отчёт")
                new_report = new_reports[word].get(u"Отчёт")
                if old_report != new_report:
                    print u"×" * 20, u"ERROR: Difference in reports"
                    continue

                # make change
                old_word_content = get_wiki_page_content(word)
                new_word_content = old_word_content.replace(old_value, new_value)
                date = datetime.utcnow().strftime("%d.%m.%Y %H:%M")
                if old_word_content != new_word_content:
                    desc = (
                        u"Применение изменений со страницы "
                        u"[[Участник:Vitalik/Массовое редактирование/"
                        u"Словоизменение/%s#%s|массового редактирования]]" % (title, word)
                    )
                    save_wiki_page(word, new_word_content, desc, wait=5)
                    report_append = u"* {{Done|%s}} — " u"''изменения успешно применены к статье \"%s\"''\n" % (
                        date,
                        word,
                    )
                else:
                    report_append = (

예제 #21

0

파일 보기

파일: base_changer.py 프로젝트: 2vitalik/words

 def get_content(self, title):
     return get_wiki_page_content(title)

예제 #22

0

파일 보기

파일: _base.py 프로젝트: 2vitalik/words

    def run(self):
        if not self.wikt_data_page or not self.description:
            raise NotImplementedError()
        m = re.search(u':Cinemantique/(.+)', self.wikt_data_page)
        name = m.group(1)
        on_value = u'* [[%s|%s]] = on' % (self.wikt_data_page, name)
        c = get_wiki_page_content(u'Участник:Cinemantique/bot')
        if on_value not in c:
            print u'bot offline -> exit'
            return
        c = get_wiki_page_content(self.wikt_data_page).strip()
        if not c:
            return
        self.add_report(u'Бот запущен', 'silver')
        self.save_report('started')

        data = {}
        items = c.split('\n\n')
        for item in items:
            title, values = self.get_item(item, data)
            if values is None:
                continue
            data[title] = values

        i = 0
        for title, values in sorted(data.items(), key=lambda x: x[0]):
            i += 1
            print i
            # print title
            content = self.get_page_content(title)
            if content is None:
                continue
            parts = re.split('^= *\{\{-([-\w]+)-(?:\|[^}]*)?\}\} *=$', content,
                             flags=re.MULTILINE)
            parts.pop(0)
            sections = [
                {'lang': part[0], 'content': part[1]}
                for part in chunks(parts, 2)
            ]
            section_content = ''
            for section in sections:
                if section['lang'] == 'ru':
                    section_content = section['content']
                    res = re.findall('\n== *[^=].*[^=] *==\n', section_content)
                    # print len(res)
                    # for r in res:
                    #     print r.encode('utf-8')
                    if len(res) > 1:
                        parts = re.split("^((?:==)(?:[^=].*?[^=])(?:==))$",
                                         section_content, flags=re.MULTILINE)
                        sections2 = [
                            {'header2': '', 'content': parts.pop(0)}
                        ]
                        sections2 += [
                            {'header2': part[0], 'content': part[1]}
                            for part in chunks(parts, 2)
                        ]
                        count_third = 0
                        for data in sections2:
                            if '===' in data['content']:
                                count_third += 1
                        if count_third > 1:
                            self.add_report(u'В статье "[[%s]]" содержатся омонимы, пропускаем.' % title, 'maroon')
                            section_content = None
                            break
            if not section_content:
                if section_content == '':
                    self.add_report(u'В статье "[[%s]]" не найдены русские заголовки, пропускаем.' % title, 'maroon')
                continue

            new_section_content = \
                self.get_new_section_content(title, values, section_content)
            if new_section_content is None:
                continue
            new_content = content.replace(section_content, new_section_content)
            self.make_change(title, content, new_content)
            self.add_report(u'Статья "[[%s]]" успешно обновлена.' % title, 'green')

        self.add_report(u'Бот завершён', 'silver')
        self.save_report('finished')
        save_wiki_page(self.wikt_data_page, '', u'Удаление обработанного содержимого - [[%s/report|Отчёт]]' % self.wikt_data_page)

예제 #23

0

파일 보기

파일: utf8mb4_to_file.py 프로젝트: 2vitalik/words

# coding: utf-8
from dictionaries.utils.file import save_file
from wikt.commons.utils.wikibot import get_wiki_page_content

s = u'|egy=[[𓊪𓏏𓇯]]'

with open('test_utf.txt', mode='w') as f:
    f.write(s.encode('utf-8'))


content = get_wiki_page_content(u'небо')
save_file(u'небо-t.txt', content, encode=u'utf-8')

예제 #24

0

파일 보기

파일: fix_morpho.py 프로젝트: 2vitalik/words

    def section_action(self, page, lang, section_content):
        super(ReplaceOldMorphoRu, self).section_action(page, lang, section_content)
        title = page.title
        if lang != 'ru':
            return
        m = re.search(u'(=== *Морфологические и синтаксические свойства *=== *(.*?)\n)=',
                      section_content, re.UNICODE | re.DOTALL)
        if not m:
            return
        old_body = m.group(2)
        p = re.compile(
            u"""(\{\{
                (?P<title>морфо\s*)  # заголовок
                \|(?P<params>[^{}]+?(\{\{[^{}]+?\}\})?[^{}]+?)  # параметры
                \}\})""",
            flags=re.UNICODE + re.DOTALL + re.VERBOSE)
        parts = p.findall(old_body)
        new_body = old_body
        # print title.encode('utf-8')
        for part in parts:
            old_params = part[2]
            is_title = old_params
            is_title = re.sub(u'(префд|прист|суфф-?|корень|инф|соед|оконч-?|интер|суффд|частица|постфикс|источник)\d*=', '', is_title)
            is_title = re.sub(u'\|', '', is_title).strip()
            if is_title == title or is_title == title + u'т' or is_title == title + u'к':
                # print
                # print title
                # print part[0]
                # {{морфо||адрес|ова|суфф2=нн|ый}}
                try:
                    sres = get_sorted_list(get_old_list(old_params))
                except ValueError as e:
                    print title.encode('utf-8')
                    # sent = False
                    # try:
                    #     send_mail(subject="morpho exception in: " + title,
                    #                       message=title + "\n\n" + repr(e),
                    #                       from_email=settings.DEFAULT_FROM_EMAIL,
                    #                       recipient_list=['*****@*****.**'])
                    #     sent = True
                    # except:
                    #     pass
                    # if sent:
                    #     return
                    # else:
                    #     raise
                    raise

                # for key, value in sres:
                #     print "- %s = '%s'" % (key, value)
                res = get_new_list(sres)
                ress = '|'.join(res)
                # print ress
                new_body = new_body.replace(part[0], u'{{морфо-ru|%s}}' % ress)
                # params = part[2].strip().split('|')
                # for value in params:
                #     value = value.strip()
                #     if '=' in value:
                #         name, value = value.split('=', 1)
                #     if '-' in value:
                #         append_dict_list(self.morpho_wrong_hyphens, title, part[0])
                #         break
            elif not is_title:
                new_body = new_body.replace(part[0], u'{{морфо-ru|}}')
        if old_body != new_body:
            # print '=' * 100
            # print old_body
            # print '-' * 100
            # print new_body
            # print '=' * 100
            old_section_content = section_content
            new_section_content = section_content.replace(old_body, new_body)
            try:
                old_content = get_wiki_page_content(title)
            except NoPage:
                send_wiki_mail(subject="pywikibot.exceptions.NoPage: " + title,
                               message=title)
                return
            except IsRedirectPage:
                send_wiki_mail(
                    subject="pywikibot.exceptions.IsRedirectPage: " + title,
                    message=title
                )
                return
            new_content = old_content.replace(old_section_content, new_section_content)
            if new_content != old_content:
                desc = u"Замена {{морфо}} на {{морфо-ru}}"
                # self.n_step += 1
                # if self.n_step >= 100:
                #     self.stop()
                new_content = default_wikifier(new_content)
                save_wiki_page(title, new_content, desc)

                # todo: create special function for this:
                d = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                append_file(os.path.join(settings.FILES_PATH, 'reports',
                                         self.report_name),
                            "%s: %s" % (d, title),
                            encode='utf-8')

예제 #25

0

파일 보기

파일: for_soshial_2.py 프로젝트: 2vitalik/words

	u'Шаблон:гл ru 6°b/cXСВ',
	u'Шаблон:гл ru 6a^-б-сяСВ',
	u'Шаблон:гл ru 6b-я-сяСВ',
	u'Шаблон:гл ru 6c-б-сяСВ',
	u'Шаблон:гл ru 6c-ёСВ',
	u'Шаблон:гл ru 6c-л-сяСВ',
	u'Шаблон:гл ru 6c-сяСВ',
	u'Шаблон:гл ru 6cXСВ',
]
for title in titles:
        # print title
        # if title == u"Шаблон:гл ru 3aСВ":
        #     skip = False
        # if skip:
        #     continue
        old_content = get_wiki_page_content(title)
        new_content = old_content
        # old_content = article.get()
        # if "[" in old_content:
        #     print u'→ "[" found!'
        # p = re.compile(u'(?P<source>(?P<prefix>\|[Прич|Деепр][А-Яа-я]* *= *)\[\[(?P<link>[^|!]+)(\||\{\{!\}\})(?P<text>[^]]+)\]\](, \[\[(?P<link2>[^|!]+)(\||\{\{!\}\})(?P<text2>[^]]+)\]\])?)', re.UNICODE)
        p = re.compile(u'(?P<source>(?P<prefix>\|[А-Яа-я]+ *= *)\[\[(?P<link>[^|!]+)(\||\{\{!\}\})(?P<text>[^]]+)\]\](, \[\[(?P<link2>[^|!]+)(\||\{\{!\}\})(?P<text2>[^]]+)\]\])?)', re.UNICODE)
        items = p.findall(old_content)
        if items:
            print
            print title
            count2 += 1
        for item in items:
            count += 1
            # print item
            # for m in item:

예제 #26

0

파일 보기

파일: adv_ru_soshial.py 프로젝트: 2vitalik/words

 def lang_action(self, page, lang, content):
     if lang != 'ru':
         return content
     parts = re.findall(u'=== ?Морфологические и синтаксические свойства ?===\n(.*?)\n===',
                        content, flags=re.DOTALL)
     if parts:
         for part in parts:
             part = part.strip()
             if not re.search(u'Наречие', part):
                 continue
             # m = re.match(u"(?P<slogi>('''|<b>)([^'<]+)('''|</b>)\n\n)Наречие(, неизменяемое)?\.?(?P<sravn>( Сравнительная степень ?[-—:]|, сравн. форма) '''?[^']+'''?\.?)(?P<other>.*)", part)
             m = re.match(u"^(?P<slogi>('''|<b>)(?P<word>[^'<]*)('''|</b>)\s*\n\n)?Наречие(?P<or>(, (?P<or1>(вводн(ое|\.)|союзное) слово|союз|частица|предлог|числительное|предикатив))? или (?P<or2>(вводн(ое|\.)|союзное) слово|союз|частица|предлог|числительное|предикатив)|)([,;] неи[из]меняемое)?\.?(?P<sravn>( Ср(\.|авн(\.|ительная)) степень[\s ]?[-—:]?|, сравн. форма) ('''?|\[\[)?(?P<sravn_value>[^\n]+)('''?|\]\])?\.?\s*)?(?P<other>\s*\n\s*\n(\{\{морфо\s*\||приставка:).*)?$", part)
             if m:
                 self.counter += 1
                 other = m.group('other')
                 # print '=' * 100
                 # print '-' * 100
                 word = m.group('word')
                 if word and '{' in word:
                     continue
                 if word:
                     if '-' not in page.title:# and False:
                         # print page.title, self.counter#, u'—',
                         # print word
                         word = word.replace('-', '|').replace(u'·', u'|·|')
                         # print u"{{по-слогам|%s}}" % word
                         # print
                         pass
                     else:
                         word = word.replace(u'—', u'-')
                         part1, part2 = word.split('-', 1)
                         # print page.title
                         # print word
                         word = "%s-%s" % (part1, part2.replace('-', '|'))
                         # print word
                         # print
                 else:
                     word = page.title
                 slogi = u"|слоги={{по-слогам|%s}}\n" % word
                 or_value = m.group('or')
                 or1 = m.group('or1')
                 or2 = m.group('or2')
                 if or_value:
                     def get_value(or_val):
                         if or_val == u'союзное слово':
                             or_val = u'союз'
                         if or_val == u'вводн. слово':
                             or_val = u'вводное слово'
                         plural = {
                             u'предикатив': u'предикативы',
                             u'частица': u'частицы',
                             u'вводное слово': u'вводные слова',
                             u'числительное': u'числительные',
                             u'предлог': u'предлоги',
                             u'союз': u'союзы',
                         }
                         return or_val, plural.get(or_val, '')
                     # print page.title, self.counter#, u'—',
                     or2, or2p = get_value(or2)
                     or_params = u'|или=%s\n|или-кат=%s\n' % (or2, or2p)
                     # print or2, or2p
                     if or1:
                         or1, or1p = get_value(or1)
                         or_params += u'|или1=%s\n|или-кат1=%s\n' % \
                                      (or1, or1p)
                         # print or1, or1p
                     # print
                 else:
                     or_params = u'|или=\n|или-кат=\n'
                 sravn = m.group('sravn_value')
                 if sravn:
                     # print page.title, self.counter#, u'—',
                     remove_end_sravn = ["''.", ']]', "'''", "''", ".'''", "]].", '.']
                     for end in remove_end_sravn:
                         if sravn.endswith(end):
                             sravn = sravn[:-len(end)]
                     sravn = sravn.replace("'', ''", ", ").replace(';', ',')
                     # print sravn
                     # print
                 else:
                     sravn = ''
                 sravn = u"|степень=%s\n" % sravn
                 # print '-' * 100
                 # print part
                 print '=' * 100
                 print page.title
                 print '-' * 100
                 result = '{{adv ru\n'
                 result += slogi
                 result += u'|тип=\n'
                 result += u'|класс=\n'
                 result += sravn
                 result += or_params
                 result += '}}\n\n'
                 result += other.strip() if other else u'{{морфо||||}}'
                 print result
                 print '-' * 100
                 print part
                 print '=' * 100
                 print
                 pass
                 content = get_wiki_page_content(page.title)
                 content, changed = wikify_headers_spaces(content)
                 content = content.replace(part, result)
                 desc = u'Использование шаблона "adv ru"'
                 save_wiki_page(page.title, content, desc, wait=5)
             else:
                 # print '=' * 100
                 # print page.title, u'×' * 50
                 # print '-' * 100
                 # print part
                 # print '=' * 100
                 pass
     return content

예제 #27

0

파일 보기

파일: templates.py 프로젝트: 2vitalik/words

 def get_templates(self, page_title):
     content = get_wiki_page_content(page_title)
     templates = re.findall(u"\{\{template\|([^|}]+)\}\}", content)
     if "&#061;" in templates:
         templates.append("=")
     return templates

예제 #28

0

파일 보기

파일: tpl_categories.py 프로젝트: 2vitalik/words

def load2():
    prefix = u'Категория:Шаблоны словоизменений/'
    pages = Page.objects.prefetch_related('page_content').\
        filter(title__startswith=prefix)
    for page in pages:
        # print '=' * 100
        # print
        title = page.title
        suffix = title[len(prefix):]
        # print page.title
        # print suffix
        if not re.match('^[-a-z]+$', suffix):
            # print u'×' * 100
            continue
        # new_cat_title = u'Категория:Шаблоны:Словоизменение/%s' % suffix
        # new_content = u'[[Категория:Шаблоны:Словоизменение/По языкам|%s]]\n' \
        #               u'[[Категория:Шаблоны/%s|Словоизменение]]\n' \
        #               % (suffix, suffix)
        # save_wiki_page(new_cat_title, new_content,
        #                u"создание категорий")
        # print page.title
        print
        print suffix, get_edit_page_url(title)
        print suffix, get_edit_page_url(title.replace(
            u'Категория:Шаблоны словоизменений',
            u'Категория:Шаблоны:Словоизменение'))
        print '-' * 10
        # print 'ok'
        content = page.content
        # print '-' * 50
        # print content
        # continue
        for line in content.split('\n'):
            if not line.strip():
                continue
            m = re.match(u'^\[\[Категория:Шаблоны словоизменений по языкам(\|([-a-z]+))?\]\]$',
                         line)
            if m:
                print u'По языкам — ok'
                continue
                lang = m.group(2)
                if not lang:
                    print
                    print suffix
                    print line
                    print u'я →', lang
                    old_content = get_wiki_page_content(title)
                    new_content = old_content.replace(
                        line, u'[[Категория:Шаблоны словоизменений по языкам|%s]]' % suffix)
                    if old_content != new_content:
                        desc = u'приведение описания категорий к единому стилю'
                        save_wiki_page(title, new_content, desc, wait=5)
                        break
                    else:
                        print u'×', title
                    pass
                elif lang != suffix:
                    # print
                    # print suffix
                    # print line
                    # print u'я →', lang
                    pass
                else:
                    # print
                    # print suffix
                    # print line
                    # print u'я →', lang
                    pass
                continue
            m = re.match(u'^\[\[Категория:Шаблоны/([-a-z]+)(\|[*А-Яа-я ]+)?\]\]$',
                         line)
            if m:
                print u'Шаблоны — ok'
                continue
                lang = m.group(1)
                sort = m.group(2)
                # if lang != suffix or sort != u'|*Ш':
                if lang != suffix:
                    # print line
                    # print u'ш →', lang, u'—' ,sort
                    continue
                # else:
                # print line
                # print u'ш →', lang, u'—' ,sort
                old_content = get_wiki_page_content(title)
                new_content = old_content.replace(
                    line, u'[[Категория:Шаблоны/%s|Словоизменение]]' % lang)
                if old_content != new_content:
                    desc = u'приведение описания категорий к единому стилю'
                    save_wiki_page(title, new_content, desc, wait=5)
                    break
                else:
                    print u'×', title
                continue
            m = re.match(u'^\[\[Категория:Викисловарь:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]$',
                         line)
            if m:
                name = m.group(1)
                sort = m.group(2)
                print u'Категория — %s — ok' % name
                print '=' * 100
                print line
                print '=' * 100
                continue
            p = re.compile(u'^\[\[Категория:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]$')
            m = p.match(line)
            if m:
                name = m.group(1)
                sort = m.group(2)
                print u'Категория — %s — ×××?' % name
                # if u'[[Категория:Шаблоны/' not in content:
                #     old_content = get_wiki_page_content(title)
                #     new_content = re.sub(u'\[\[Категория:([-А-Яа-я ()]+)(\|[*А-Яа-я]+)?\]\]',
                #                          u'[[Категория:Шаблоны/%s|Словоизменение]]' % suffix,
                #                          old_content)
                #     if old_content != new_content:
                #         desc = u'замена основной категории языка на "Шаблоны/%s"' % suffix
                #         save_wiki_page(title, new_content, desc, wait=5)
                #         break
                #     else:
                #         print u'#' * 100, title
                #     pass
                continue
                # print line
                if sort:
                    if sort != u'|*Ш':
                        # print
                        # print suffix
                        # print line
                        # print u'к →', '...', u'—' , sort
                        pass
                else:
                    print
                    print suffix
                    print line
                    print u'к →', name, u'—' , sort
                continue
                # if sort != u'|*Ш':
                #     print line
                #     print u'к →', '...', u'—' ,sort
                continue
            m = re.match(u'^\[\[[a-z]+:([^]]+)]\]$',
                         line)
            if m:
                print 'lang', u'$' * 40
                print '=' * 100
                print line
                print '=' * 100
                # print u'l →', m.group(1)
                continue
            if line == u'[[Категория:Шаблоны словоизменений/zh]]':
                print 'base'
                print '=' * 100
                print line
                print '=' * 100
                # print u'с →', 'ok'
                continue
            print line, '×' * 100
            print '=' * 100
            print line
            print '=' * 100

예제 #29

0

파일 보기

파일: labels.py 프로젝트: 2vitalik/words

 def get_templates(self, page_title):
     content = get_wiki_page_content(page_title)
     return re.findall(u'\{\{template\|([^|}]+)\}\}', content)

예제 #30

0

파일 보기

파일: slogi.py 프로젝트: 2vitalik/words

    def action(self, page, **kwargs):
        super(SlogiReplacer, self).action(page, **kwargs)
        title = page.title
        if ':' in title:
            return
        content = page.content
        if u"слоги=" not in content:
            return
        p = re.compile(u"слоги=(?P<value>.*)\n")
        values = p.findall(content)
        # if self.i > 10000:
        #     self.stop()
        # print title
        # print "%s&section=2" % get_edit_page_url(title)
        for value in values:
            m = re.match(u"^\{\{по[- ]слогам\|(?P<value2>[^}]*)\}\}(?P<tail>.*)$",
                         value)
            if m:  # начинается с "{{по-слогам|"
                value2 = m.group('value2')
                tail = m.group('tail')
                if tail:  # после "{{по-слогам|...}}" что-то есть
                    m2 = re.match(u'\s*(?P<close_parent>\|*\}\})?\s*(?P<tail2>.*)',
                                  tail)
                    close_parent = m2.group('close_parent')
                    tail2 = m2.group('tail2')
                    if tail2 and not tail.strip().startswith('|') \
                            and not close_parent \
                            and u'{{по-слогам|' not in tail2:
                        # print title, "%s&section=2" % get_edit_page_url(title)
                        # print u'→', value2
                        # print u'→', tail
                        # print
                        pass
                if value2 and base_word(value2) != base_word(title):
                    # print u'→', title, "(1) %s&section=2" % get_edit_page_url(title)
                    # print base_word(title)
                    # print base_word(value2)
                    # print u'→', value2
                    # print repr(base_word(title))
                    # print repr(base_word(value2))
                    self.content += out(title, value2) + '\n'

                    # if u'—' in value2:
                    #     new_value2 = value2.replace(u'—', u'-')
                    #     new_value = value.replace(value2, new_value2)
                    #     old_content = get_wiki_page(title).get()
                    #     new_content = old_content.replace(
                    #         u'слоги=%s' % value,
                    #         u'слоги=%s' % new_value,
                    #     )
                    #     if old_content != new_content:
                    #         desc = u'замена длинного тире на дефис в "слоги="'
                    #         save_wiki_page(title, new_content, desc, wait=5)
                    #     else:
                    #         print u'×', title

                    # print '---'
                    # print value2.strip().replace(u'é', u'е')
                    # print title
                    # print repr(value2.strip().replace(u'é', u'е'))
                    # print repr(title)
                    # print
                    pass  # \u0311, \xad
                    # if base_word(value2).replace(u'é', u'е') == title:
                    # if base_word(value2).replace(u'á', u'а') == title:
                    #     print "(1) %s&section=2" % get_edit_page_url(title)
                    #     print title
                    #     print value2
                    #     print
                    #     pass

                        # old_content = get_wiki_page(title).get()
                        # new_content = old_content.replace(
                        #     u"слоги={{по-слогам|%s}}" % value2,
                        #     u"слоги={{по-слогам|%s}}" % title)
                        # if old_content != new_content:
                        #     desc = u'добавление {{по-слогам}} с заменой "е" на "ё"'
                        #     save_wiki_page(title, new_content, desc, wait=5)

                else:
                    # ok
                    pass
            else:
                m = re.match(u"^(?P<value2>\{\{PAGENAME\}\}|[^|}]*)(?P<tail>.*)$",
                             value)
                if m:
                    value2 = m.group('value2')
                    tail = m.group('tail')
                    if tail:
                        if tail.startswith('|') or tail.startswith('}}'):
                            # print title, "%s&section=2" % get_edit_page_url(title)
                            # print u'→', value2
                            # print u'→', tail
                            # print
                            pass
                        else:
                            # print title, "%s&section=2" % get_edit_page_url(title)
                            # print u'→', value2
                            # print u'→', tail
                            # print
                            pass
                    if value2 and base_word(value2) != base_word(title) \
                            and value2 != u'{{PAGENAME}}':
                        # print u'→', title, "(2) %s&section=2" % get_edit_page_url(title)
                        # print base_word(title)
                        # print base_word(value2)
                        # print u'→', value2
                        # print repr(base_word(title))
                        # print repr(base_word(value2))

                        # self.content += out(title, value2) + '\n'
                        # if u'—' in value2:
                        #     new_value2 = value2.replace(u'—', u'-')
                        #     new_value = value.replace(value2, new_value2)
                        #     old_content = get_wiki_page(title).get()
                        #     new_content = old_content.replace(
                        #         u'слоги=%s' % value,
                        #         u'слоги=%s' % new_value,
                        #     )
                        #     if old_content != new_content:
                        #         desc = u'замена длинного тире на дефис в "слоги="'
                        #         save_wiki_page(title, new_content, desc, wait=5)
                        #     else:
                        #         print u'×', title

                        # print '---'
                        # print value2
                        # print title.replace(u'ё', u'е')
                        # print repr(value2)
                        # print repr(title.replace(u'ё', u'е'))
                        # print '---'
                        # print value2.strip().replace(u'é', u'е')
                        # print title
                        # print repr(value2.strip().replace(u'é', u'е'))
                        # print repr(title)
                        # print
                        if title.replace(u'ё', u'е') == value2.strip():
                            # print "%s&section=2" % get_edit_page_url(title)
                            # print title
                            # print value2
                            # print
                            #
                            # old_content = get_wiki_page(title).get()
                            # new_content = old_content.replace(
                            #     u"слоги=%s" % value2,
                            #     u"слоги={{по-слогам|%s}}" % title)
                            # if old_content != new_content:
                            #     desc = u'добавление {{по-слогам}} с заменой "е" на "ё"'
                            #     save_wiki_page(title, new_content, desc, wait=5)
                            pass
                        # if value2.strip().replace(u'é', u'е') == title:
                        # if value2.strip().replace(u'á', u'а') == title:
                        #     print "(2) %s&section=2" % get_edit_page_url(title)
                        #     print title
                        #     print value2
                        #     print
                        #     pass

                        pass
                    else:
                        # ok
                        pass
        # print
        return
        try:
            old_content = content
            old_content = get_wiki_page_content(title)
        except IsRedirectPage:
            return
        # new_content = re.sub(u"\|основа\d?=\{\{\{\d\|?\}\}\}(́?\{\{\{\d\|?\}\}\})?\n", '', old_content)
        new_content = re.sub(u"\|основа=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n",
                             u'|основа=\\1\n',
                             old_content)
        new_content = re.sub(u"\|слоги=\{\{\{\d\|([А-Яа-я]+)\}\}\}\n",
                             u'|слоги={{по-слогам|\\1}}\n',
                             new_content)
        if old_content == new_content:  # content doesn't change
            return
        desc = u'Удаление "странных" параметров'
        save_wiki_page(title, new_content, desc, wait=5)