def receive(self, signal, page): word = page.title if signal == 'page_deleted': pass # print '@ inflection removed' WordInflection.objects.filter(word=word).delete() elif signal == 'page_content_changed': pass # print '@ inflection changes check' old_items = WordInflection.objects.filter(word=word) old_tpls = [item.content for item in old_items] morph = u'сущ' lang = u'ru' parts = get_inflection_templates(page.content, morph, lang) new_tpls = [part['tpl'] for part in parts] if len(old_tpls) == len(new_tpls) and set(old_tpls) == set(new_tpls): pass # print 'same' else: pass # print 'different' WordInflection.objects.filter(word=word).delete() for part in parts: template_title = part['title'].strip() kind, gender, num = parse_template_title(template_title) WordInflection.objects.bulk_add( WordInflection(word=word, template=template_title, content=part['tpl'], gender=gender, kind=kind, num=num) ) WordInflection.objects.bulk_add() pass # print '@@ inflection really changed', '+' * 40 print ' @ inflection changed'
def parse_titles(): for template in TemplateInflection.objects.filter(morph=u'сущ', lang='ru'): title = template.title print title kind, gender, num = parse_template_title(title) print kind, gender, num template.kind = kind template.gender = gender template.num = num template.save()
def process_template(article, lang): prefix = u'Шаблон:' title = article.title() if not title.startswith(prefix): print title, '-', 'BAD!', 'BAD!', '#' * 100 return title = title[len(prefix):] print title article = pywikibot.Page(site, u"Шаблон:%s" % title) content = article.get() edited = convert_wiki_date(article.editTime()) edited = make_aware(edited, pytz.UTC) words = title.split(' ') morph = words[0] if morph not in [u'adv', u'conj', u'interj', u'гл', u'глагол', u'мест', u'прил', u'сущ', u'числ', u'падежи', u'prep', u'affix', u'intro', u'phrase', u'suffix', u'predic', u'склонение', u'part', u'артикль', u'article', u'арт', u'деепр', u'onomatop', u'interj1', u'прич', u'герундий', u'склон', u'степени', u'междом', u'спряжения', u'спряжение', u'словоизм', u'сущ2', u'принад', u'palat', u'abbrev', u'measure', u'morph', u'prefix', u'ein', u'союз', u'словоформы', u'глаг', u'послел', u'послелог', u'падежи-мест', u'нар', u'морфема', u'межд', ]: print u'm →', title return prefix = u"%s %s" % (morph, lang) if not title.startswith(prefix): print u'e →', title, '(%s)' % lang return info = title[len(prefix):].strip() kind, gender, num = parse_template_title(title) return TemplateInflection( title=title, content=content, edited=edited, lang=lang, morph=morph, info=info, kind=kind, gender=gender, num=num, )