def lang_action(self, page, lang, content): p = re.compile(u"\{\{(cat(eg)?|катег)(\|(Глаголы)?\|*lang=[-a-z]*)?\|*\}\}|\[\[Категория:\]\]") new_content = p.sub(u"{{Категория|язык=%s|||}}" % lang, content) if new_content != content: new_content = word_length_add_lang(new_content, lang) return new_content
def lang_action(self, page, lang, content): # if self.i < 130000: # return content # self.save_step() p = re.compile(u"(\{\{(categ|катег)\|(?P<value>[^}]*)\|*\}\})") existed = re.findall(u"(\{\{Категория\|(?P<value>[^}]*)\}\})", content) lines = p.findall(content) if len(lines) and len(lines) + len(existed) > 1: # print !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # print page.title # print '; '.join([line[3] for line in lines]) # print '; '.join([line[0] for line in existed]) # print # return content pass if p.search(content): print page.title print "; ".join([repr(line[0]) for line in lines]) print new_content = content for line in lines: # value = line[3] # value = re.sub('^lang=\|', '', value) # value = re.sub('\|lang=\|*$', '', value) # value = re.sub('^\|+', '', value) # value = re.sub('\|+$', '', value) # p = re.compile('\|+lang=([-a-z]+)$') # m = p.search(value) # cat_lang = '' # if m: # cat_lang = m.group(1) # value = p.sub('', value) # else: # p = re.compile('^lang=([-a-z]+)\|+') # m = p.search(value) # if m: # cat_lang = m.group(1) # value = p.sub('', value) # if 'lang' in value: # print value # if value != line[2]: # print line[2] # print value # print # if not value: # print line[2] # if 'lang' not in value and u'язык=' not in value:# and value: if u"язык=" not in line[0]: # return content # if lang == 'ru': # # print '@' * 50, lang, page.title # # print '@' * 100 # # print '@' * 100 # return content # if '-ru-' in page.content: # return content # if lang == cat_lang and lang or True: # new_content = new_content.\ # replace(line[0], # u"{{Категория|язык=%s|%s}}" % (cat_lang, value)) new_content = new_content.replace( line[0], line[0].replace(u"{{categ|", u"{{Категория|").replace(u"|lang=", u"|язык=") ) # if lang == 'ru': # new_content = new_content. \ # replace(u"{{Категория|язык=ru|||}}\n", '') # new_content = new_content. \ # replace(u"{{Категория|язык=ru|||}}", '') # new_content = re.sub(u"\{\{Категория\|язык=%s\|+\}\}\\n?" % lang, # '', new_content) # else: # print # print page.title # print line[0] # print if new_content != content: new_content = word_length_add_lang(new_content, lang) # self.stop() return new_content
def additional_change_content(self, content): content = super(ReplaceEmptyCategoryOld, self).additional_change_content(content) return word_length_add_lang(content, self.lang)