Exemplo n.º 1
0
    def tpl_action(self, page, tpl, title, morph, lang, params):
        # if self.i > 20000:
        #     self.stop()
        tpl_title = title.strip()
        # print page.title, '-', title
        # print params
        call_params, call_numeric = process_call_params(params)
        # for key, value in call_params.items():
        #     print key
        mojo = u'можо' in call_params
        rod = u'род' in call_params

        def add_key_value(key, value, page_title):
            report.setdefault(tpl_title, dict())
            report[tpl_title].setdefault(key, dict())
            report[tpl_title][key].setdefault(value, list())
            report[tpl_title][key][value].append(page_title)

        if mojo or rod:
            print page.title, '-', tpl_title
            if mojo:
                # print u'можо =', call_params[u'можо']
                # self.mojos.append(tpl_title)
                add_key_value(key=u'можо', value=call_params[u'можо'],
                              page_title=page.title)
            if rod:
                # print u'род =', call_params[u'род']
                # self.rods.append(tpl_title)
                add_key_value(key=u'род', value=call_params[u'род'],
                              page_title=page.title)
Exemplo n.º 2
0
 def tpl_action(self, page, old_tpl, title, morph, lang, params):
     call_params, call_numeric = process_call_params(params)
     if u"соотв" in call_params:
         if not call_params[u"соотв"]:
             return
         # print '-' * 40
         print page.title, " -> ", call_params[u"соотв"]
Exemplo n.º 3
0
 def make_changes(self, page, old_tpl, title, morph, lang, params):
     tpl_title = title.strip()
     call_params, call_numeric = process_call_params(params)
     if tpl_title in selected_tpls and u'можо' in call_params:
         print page.title, tpl_title
         old_value = call_params[u'можо']
         print u'можо =', old_value
         if old_value == u'можо':
             new_value = u'общ'
         elif old_value == u'мо' or old_value == u'м':
             new_value = u'муж'
         elif old_value == u'мосо':
             new_value = u'мс'
         else:
             raise Exception('Never should happen')
         return old_tpl.replace(u'|можо=%s\n' % old_value,
                                u'|род=%s\n' % new_value)
     return old_tpl
Exemplo n.º 4
0
 def tpl_action(self, page, tpl, title, morph, lang, params):
     if self.i > 10000:
         self.stop()
     title = title.strip()
     print page.title, '-', title
     # print params
     self.table.setdefault(title, dict(count=0, mojo_count=0,
                                       params=set(), mojo_values=set()))
     self.table[title]["count"] += 1
     call_params, call_numeric = process_call_params(params)
     mojo = u'можо' in call_params
     # rod = u'род' in call_params
     if mojo:
         self.table[title]["mojo_count"] += 1
         self.table[title]["mojo_values"].add(call_params[u'можо'])
     for key, value in call_params.items():
         print key
         self.table[title]["params"].add(key)
     print
Exemplo n.º 5
0
    def tpl_action(self, page, tpl, title, morph, lang, params):
        title = title.strip()
        # empty_templates = [
        #     u"сущ ru m ina", u"сущ ru f ina", u"сущ ru n ina",
        #     u"сущ ru m a", u"сущ ru f a", u"сущ ru n a",
        # ]
        # if title in empty_templates:
        #     print page.title
        #if morph != u'сущ':
        #    continue
        # print '-' * 80

        # output
        # self.counter += 1
        # if not self.counter % 50:
        #     save_wiki_page(u"Участник:Vitalik/Словоформы/v2/А1/%s" % self.n,
        #                    self.content, u"Получение списка словоформ")
        #     self.n += 1
        #     self.content = ''
        # # print "\n\n== [[%s]] ==" % page.title
        # self.content += "\n\n== [[%s]] ==\n" % page.title

        # print title
        # return
        #continue
        call_params, call_numeric = process_call_params(params)
        try:
            template = TemplateInflection.objects.get(title=title)
        except ObjectDoesNotExist:
            return  # todo: process templates redirects ("гл ru 4b-бСВ" → "гл ru 4b-лСВ")
        if not template.forms:
            print title, '- maybe wrong template'
            return
        print title
        tpl_forms = get_dict_from_text(template.forms)
        tpl_params = get_dict_from_text(template.params)
        for key, value in tpl_params.items():
            tpl_params[key] = universal_process_template(value, call_params)
        for key, value in tpl_forms.items():
            value = universal_process_template(value, call_params)
            value = process_template(value, tpl_params, key, morph)
            value = divide_words(value)
            tpl_forms[key] = value

        form_results = dict()

        for key, values in tpl_forms.items():
            if morph == u'сущ':
                if key == 'nom-sg':
                    value = values[0]
                    if remove_stress(value) and page.title != remove_stress(value):
                        pass  # todo: мсправить их все-таки
                        # print
                        # print "https://ru.wiktionary.org/wiki/%s" % urllib.quote_plus(page.title.encode('utf-8'))
                        # print page.title
                        # print remove_stress(value)
                        # print repr(page.title)
                        # print repr(remove_stress(value))

        for key, values in tpl_forms.items():
            # if re.search('[a-z]', ' '.join(values)):  #todo! find them!
            #     print values
            if not values or len(values) == 1 and not values[0]:
                continue
            form_params = get_form_params(morph, key, tpl_params)
            if not form_params:
                continue
            for value in values:
                if remove_stress(value) == page.title:
                    continue
                form_results.setdefault(value, list())
                form_results[value].append(form_params.copy())

        # todo: если полностью совпали, то тоже удалять
        join_form_results(morph, form_results)

        db_forms = list()
        for value, items in form_results.items():
            for form_params in items:
                if not form_params:
                    continue
                form_template = u"{{Форма-%s\n|язык=ru\n|база=%s\n" % (morph, page.title)
                for param_name, param_value in form_params.items():
                    form_template += "|%s=%s\n" % (param_name, param_value)
                form_template += u"|слоги={{по-слогам|%s}}\n" % value
                form_template += "}}"

                # output
                # # print "'''[[%s]]'''" % remove_stress(value)
                # # print form_template
                # self.content += "'''[[%s]]'''\n" % remove_stress(value)
                # self.content += "%s\n" % form_template
                # # print

                # db_form, created = WordForm.objects.get_or_create(
                # # if created:

                # WordForm.objects.create(
                #     title=remove_stress(value),
                #     base=page.title,
                #     value=value,
                #     template=form_template
                # )

                db_form = WordForm(
                    title=remove_stress(value),
                    base=page.title,
                    value=value,
                    template=form_template
                )
                db_forms.append(db_form)

                # db_counter += 1
                # if len(db_forms) > 1000:
                #     WordForm.objects.bulk_create(db_forms)
                #     print dt(), '> forms added:', db_counter
                #     db_forms = []
        WordForm.objects.bulk_create(db_forms)