class ReplicsCounter(): def __init__(self): self.cache = Storage() self.cache.create("articles", \ {"oldid":"INT UNIQUE", "name":"TEXT", "ts":"DATE", "replics": "INT"}) def countPage(self, page): """Counts repics at AFI page""" sections = {} sect = None n = -1 # one line for header for s in page.getSections(): if sect != None: sections[sect] = (n, s[0]) sect = s[3] n = s[0] sections[sect] = (n, len(page.get())) # last one for s in sections: replics = -1 # one for header text = page.get()[sections[s][0]:sections[s][1]].splitlines() for line in text: sline = line.strip() if (len(sline) > 2): if sline[:2] != "{{" and sline[:-2] != "}}": replics += 1 #print "%s %s" % (replics, line) wikipedia.output( u"%s %s %s" % (s, sections[s], replics)) self.cache.execute(u'UPDATE articles SET replics = %s WHERE name = "%s";' % (replics, self.cache.quote(s))) def countCat(self, catname): cat = catlib.Category(wikipedia.getSite(), catname) for page in cat.articles(): print page self.countPage(page) def replicsPage(self, pagename): r = self.cache.findone('articles', {"name":pagename}, what = ["replics"]) if r == None: return "-" else: return r[0]
class AllAFI: """module for AFI stats update""" def __init__(self, action): self.action = action self.site = wikipedia.getSite() self.afi = catlib.Category(self.site, \ u'Категория:Википедия:Статьи для срочного улучшения') self.afi_list = [] self.afi_list_title = [] self.cache = Storage() def load_all(self): """Loads all articles for improvement to sqlite table""" self.cache.create('category', {'name':'TEXT', 'cat':'TEXT'}) self.cache.delete('category') self.afi_list = self.afi.articlesList() self.afi_list_title = [self.cache.quote(_.title(withNamespace=False)) for _ in self.afi.articlesList()] for a in self.afi_list: wikipedia.output(a) for cat in a.categories(): self.cache.insert('category', (a.title(withNamespace=False), cat.title(withNamespace=False))) # now clear articles table from non-actual articles re = self.cache.cursor.execute(u"SELECT name FROM articles;") for l in re.fetchall(): if l[0] not in self.afi_list_title: wikipedia.output(l[0]) self.cache.delete('articles', {'name':l[0]}) def update_stats(self): """prints stats to wikipedia page""" text = "" n1 = self.cache.cursor.execute("SELECT count(DISTINCT name) FROM category;").fetchone()[0] n2 = self.cache.cursor.execute("SELECT count(*) FROM articles;").fetchone()[0] text += u"Всего статей на КУЛ: '''%s''', статей в базе бота '''%s''' \r\n" % (n1, n2) re = self.cache.cursor.execute("SELECT cat, count(*) AS c FROM category GROUP BY cat HAVING c>10 ORDER BY c DESC;") text += u"== Топ категорий <ref>Категории, в которых более 10 статей на улучшении, количество статей указано в скобках</ref> == \r\n" for l in re.fetchall(): text += u"* [[:Категория:%s|]]: (%s) \r\n" % l text += u"== Самые старые статьи <ref>Учитывается самая первая номинация КУЛ</ref> == \r\n" re = self.cache.cursor.execute(u"SELECT name, ts FROM articles ORDER BY ts limit 20;") for l in re.fetchall(): text += u"* [[%s]] (%s) \r\n" % l re = self.cache.cursor.execute("SELECT count(*), replics FROM articles GROUP BY replics;") text += u"== По количеству реплик == \r\n" for l in re.fetchall(): text += u"* Обсуждения %s статей имеют %s реплик\r\n" % (l) re = self.cache.cursor.execute("SELECT topic, topic, n, ts FROM updates ORDER BY n DESC;") text += u"== Последние обновления == \r\n" for l in re.fetchall(): text += u"* [[Википедия:К улучшению/Тематические обсуждения/%s|%s]]: (Статей %s, обновлена %s) \r\n" % (l) text += u"== Примечания ==\r\n{{примечания}}" P = wikipedia.Page(self.site, u"Википедия:К улучшению/Тематические обсуждения/Статистика") P.put(text, u"Обновление статистики", botflag = True) def run(self): """entry point""" if self.action == "all": self.load_all() self.update_stats()